From 9ac45d26974d88c78ceddd62b4e536fed0bc925d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 8 Jul 2025 10:08:11 -0400 Subject: [PATCH 001/143] moving error classes/utilities to common module --- pydough/errors/__init__.py | 17 ++++++++ pydough/errors/error_types.py | 42 +++++++++++++++++++ .../errors.py => errors/error_utils.py} | 10 +---- pydough/exploration/explain.py | 2 +- pydough/exploration/term.py | 2 +- pydough/metadata/__init__.py | 2 - .../collections/collection_metadata.py | 7 ++-- .../collections/simple_table_metadata.py | 4 +- pydough/metadata/graphs/graph_metadata.py | 3 +- pydough/metadata/parse.py | 7 ++-- .../properties/cartesian_product_metadata.py | 4 +- .../properties/general_join_metadata.py | 4 +- .../metadata/properties/property_metadata.py | 6 +-- .../properties/scalar_attribute_metadata.py | 2 +- .../properties/simple_join_metadata.py | 6 +-- .../subcollection_relationship_metadata.py | 2 +- .../properties/table_column_metadata.py | 7 ++-- pydough/pydough_operators/base_operator.py | 3 +- .../pydough_operators/operator_registry.py | 3 +- .../type_inference/type_verifier.py | 7 +--- pydough/qdag/__init__.py | 3 +- pydough/qdag/collections/calculate.py | 2 +- pydough/qdag/collections/collection_access.py | 2 +- pydough/qdag/collections/collection_qdag.py | 2 +- pydough/qdag/collections/global_context.py | 2 +- pydough/qdag/collections/order_by.py | 2 +- pydough/qdag/collections/partition_by.py | 2 +- pydough/qdag/collections/partition_child.py | 2 +- pydough/qdag/collections/where.py | 2 +- pydough/qdag/errors.py | 11 ----- .../expressions/back_reference_expression.py | 2 +- .../expressions/child_reference_expression.py | 2 +- pydough/qdag/expressions/column_property.py | 2 +- pydough/qdag/expressions/reference.py | 2 +- pydough/qdag/expressions/sided_reference.py | 2 +- pydough/qdag/node_builder.py | 4 +- pydough/types/array_type.py | 3 +- pydough/types/errors.py | 12 ------ pydough/types/map_type.py | 3 +- pydough/types/parse_types.py | 3 +- pydough/types/struct_type.py | 3 +- pydough/unqualified/__init__.py | 2 - pydough/unqualified/errors.py | 11 ----- pydough/unqualified/qualification.py | 2 +- pydough/unqualified/unqualified_node.py | 5 +-- tests/test_metadata_errors.py | 3 +- 46 files changed, 125 insertions(+), 106 deletions(-) create mode 100644 pydough/errors/__init__.py create mode 100644 pydough/errors/error_types.py rename pydough/{metadata/errors.py => errors/error_utils.py} (97%) delete mode 100644 pydough/qdag/errors.py delete mode 100644 pydough/types/errors.py delete mode 100644 pydough/unqualified/errors.py diff --git a/pydough/errors/__init__.py b/pydough/errors/__init__.py new file mode 100644 index 000000000..1a89f4ceb --- /dev/null +++ b/pydough/errors/__init__.py @@ -0,0 +1,17 @@ +""" +Module for error handling in PyDough. +""" + +__all__ = [ + "PyDoughMetadataException", + "PyDoughQDAGException", + "PyDoughTypeException", + "PyDoughUnqualifiedException", +] + +from .error_types import ( + PyDoughMetadataException, + PyDoughQDAGException, + PyDoughTypeException, + PyDoughUnqualifiedException, +) diff --git a/pydough/errors/error_types.py b/pydough/errors/error_types.py new file mode 100644 index 000000000..5951bb2d0 --- /dev/null +++ b/pydough/errors/error_types.py @@ -0,0 +1,42 @@ +""" +Definitions of various exception classes used within PyDough. +""" + +__all__ = [ + "PyDoughMetadataException", + "PyDoughQDAGException", + "PyDoughTypeException", + "PyDoughUnqualifiedException", +] + + +class PyDoughMetadataException(Exception): + """ + Exception raised when there is an error relating to PyDough metadata, such + as an error while parsing/validating the JSON or an ill-formed pattern. + """ + + +class PyDoughUnqualifiedException(Exception): + """ + Exception raised when there is an error relating to the PyDough + unqualified form, such as a Python object that cannot be coerced or an + invalid use of a method that can be caught even without qualification. + """ + + +class PyDoughQDAGException(Exception): + """ + Exception raised when there is an error relating to a PyDough QDAG, such + as malformed arguments/structure, undefined term accesses, singular vs + plural cardinality mismatches, or other errors that can be caught during + qualification. + """ + + +class PyDoughTypeException(Exception): + """ + Exception raised when there is an error relating to PyDough types, such + as malformed inputs to a parametrized type or a string that cannot be + parsed into a type. + """ diff --git a/pydough/metadata/errors.py b/pydough/errors/error_utils.py similarity index 97% rename from pydough/metadata/errors.py rename to pydough/errors/error_utils.py index c4dddb4cd..d2a97dd46 100644 --- a/pydough/metadata/errors.py +++ b/pydough/errors/error_utils.py @@ -1,5 +1,5 @@ """ -The definitions of error-handling utilities for the PyDough metadata module. +The definitions of error-handling utilities used by PyDough """ __all__ = [ @@ -13,7 +13,6 @@ "OrCondition", "PossiblyEmptyListOf", "PossiblyEmptyMapOf", - "PyDoughMetadataException", "PyDoughPredicate", "extract_array", "extract_bool", @@ -33,12 +32,7 @@ from abc import ABC, abstractmethod - -class PyDoughMetadataException(Exception): - """Exception raised when there is an error relating to PyDough metadata, such - as an error while parsing/validating the JSON or an ill-formed pattern. - """ - +from .error_types import PyDoughMetadataException ############################################################################### # Predicate Classes diff --git a/pydough/exploration/explain.py b/pydough/exploration/explain.py index 5393c1861..8e16afa90 100644 --- a/pydough/exploration/explain.py +++ b/pydough/exploration/explain.py @@ -8,6 +8,7 @@ import pydough import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs +from pydough.errors import PyDoughQDAGException from pydough.metadata.abstract_metadata import AbstractMetadata from pydough.metadata.collections import CollectionMetadata, SimpleTableMetadata from pydough.metadata.graphs import GraphMetadata @@ -33,7 +34,6 @@ PyDoughCollectionQDAG, PyDoughExpressionQDAG, PyDoughQDAG, - PyDoughQDAGException, Reference, SubCollection, TableCollection, diff --git a/pydough/exploration/term.py b/pydough/exploration/term.py index f5ea0437f..4f6e62c20 100644 --- a/pydough/exploration/term.py +++ b/pydough/exploration/term.py @@ -10,6 +10,7 @@ import pydough import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs +from pydough.errors import PyDoughQDAGException from pydough.qdag import ( BackReferenceExpression, ChildReferenceExpression, @@ -18,7 +19,6 @@ PyDoughCollectionQDAG, PyDoughExpressionQDAG, PyDoughQDAG, - PyDoughQDAGException, Reference, ) from pydough.unqualified import ( diff --git a/pydough/metadata/__init__.py b/pydough/metadata/__init__.py index 980b07289..00b10dab9 100644 --- a/pydough/metadata/__init__.py +++ b/pydough/metadata/__init__.py @@ -8,7 +8,6 @@ "GeneralJoinMetadata", "GraphMetadata", "PropertyMetadata", - "PyDoughMetadataException", "SimpleJoinMetadata", "SimpleTableMetadata", "SubcollectionRelationshipMetadata", @@ -17,7 +16,6 @@ ] from .collections import CollectionMetadata, SimpleTableMetadata -from .errors import PyDoughMetadataException from .graphs import GraphMetadata from .parse import parse_json_metadata_from_file from .properties import ( diff --git a/pydough/metadata/collections/collection_metadata.py b/pydough/metadata/collections/collection_metadata.py index f059798ee..85f36ec15 100644 --- a/pydough/metadata/collections/collection_metadata.py +++ b/pydough/metadata/collections/collection_metadata.py @@ -4,13 +4,13 @@ from abc import abstractmethod -from pydough.metadata.abstract_metadata import AbstractMetadata -from pydough.metadata.errors import ( +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import ( HasType, - PyDoughMetadataException, extract_string, is_valid_name, ) +from pydough.metadata.abstract_metadata import AbstractMetadata from pydough.metadata.graphs import GraphMetadata @@ -253,6 +253,7 @@ def add_properties_from_json(self, properties_json: list) -> None: scalar property that should be parsed and inserted into the collection. """ + from pydough.errors import PyDoughMetadataException from pydough.metadata.properties import TableColumnMetadata for property_json in properties_json: diff --git a/pydough/metadata/collections/simple_table_metadata.py b/pydough/metadata/collections/simple_table_metadata.py index 5dfd747ca..f568cea4e 100644 --- a/pydough/metadata/collections/simple_table_metadata.py +++ b/pydough/metadata/collections/simple_table_metadata.py @@ -3,10 +3,10 @@ table in a relational system. """ -from pydough.metadata.errors import ( +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import ( HasPropertyWith, NoExtraKeys, - PyDoughMetadataException, extract_array, extract_object, extract_string, diff --git a/pydough/metadata/graphs/graph_metadata.py b/pydough/metadata/graphs/graph_metadata.py index 24ea12a14..6456f442b 100644 --- a/pydough/metadata/graphs/graph_metadata.py +++ b/pydough/metadata/graphs/graph_metadata.py @@ -2,8 +2,9 @@ Definition of PyDough metadata for a graph. """ +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import HasType, is_valid_name from pydough.metadata.abstract_metadata import AbstractMetadata -from pydough.metadata.errors import HasType, PyDoughMetadataException, is_valid_name class GraphMetadata(AbstractMetadata): diff --git a/pydough/metadata/parse.py b/pydough/metadata/parse.py index de44f0067..b42389d45 100644 --- a/pydough/metadata/parse.py +++ b/pydough/metadata/parse.py @@ -6,18 +6,19 @@ import json -from .collections import CollectionMetadata, SimpleTableMetadata -from .errors import ( +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import ( HasPropertyWith, HasType, NoExtraKeys, - PyDoughMetadataException, extract_array, extract_bool, extract_string, is_json_object, is_string, ) + +from .collections import CollectionMetadata, SimpleTableMetadata from .graphs import GraphMetadata from .properties import ( CartesianProductMetadata, diff --git a/pydough/metadata/properties/cartesian_product_metadata.py b/pydough/metadata/properties/cartesian_product_metadata.py index cd2568f09..7f8fc45c2 100644 --- a/pydough/metadata/properties/cartesian_product_metadata.py +++ b/pydough/metadata/properties/cartesian_product_metadata.py @@ -6,14 +6,14 @@ __all__ = ["CartesianProductMetadata"] -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors.error_utils import ( NoExtraKeys, extract_array, extract_bool, extract_object, extract_string, ) +from pydough.metadata.collections import CollectionMetadata from pydough.metadata.graphs import GraphMetadata from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/general_join_metadata.py b/pydough/metadata/properties/general_join_metadata.py index b5eac06f8..1bbeb745b 100644 --- a/pydough/metadata/properties/general_join_metadata.py +++ b/pydough/metadata/properties/general_join_metadata.py @@ -6,14 +6,14 @@ __all__ = ["GeneralJoinMetadata"] -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors.error_utils import ( NoExtraKeys, extract_array, extract_bool, extract_object, extract_string, ) +from pydough.metadata.collections import CollectionMetadata from pydough.metadata.graphs import GraphMetadata from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/property_metadata.py b/pydough/metadata/properties/property_metadata.py index 2ccc3010c..3baf90e69 100644 --- a/pydough/metadata/properties/property_metadata.py +++ b/pydough/metadata/properties/property_metadata.py @@ -6,12 +6,12 @@ from abc import abstractmethod -from pydough.metadata.abstract_metadata import AbstractMetadata -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors.error_utils import ( HasType, is_valid_name, ) +from pydough.metadata.abstract_metadata import AbstractMetadata +from pydough.metadata.collections import CollectionMetadata class PropertyMetadata(AbstractMetadata): diff --git a/pydough/metadata/properties/scalar_attribute_metadata.py b/pydough/metadata/properties/scalar_attribute_metadata.py index 09857fca2..9de7d0881 100644 --- a/pydough/metadata/properties/scalar_attribute_metadata.py +++ b/pydough/metadata/properties/scalar_attribute_metadata.py @@ -7,8 +7,8 @@ from abc import abstractmethod +from pydough.errors.error_utils import HasType from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import HasType from pydough.types import PyDoughType from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/simple_join_metadata.py b/pydough/metadata/properties/simple_join_metadata.py index e1be9ddca..3e267ddfd 100644 --- a/pydough/metadata/properties/simple_join_metadata.py +++ b/pydough/metadata/properties/simple_join_metadata.py @@ -6,17 +6,17 @@ __all__ = ["SimpleJoinMetadata"] -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import ( HasPropertyWith, NoExtraKeys, - PyDoughMetadataException, extract_array, extract_bool, extract_object, extract_string, simple_join_keys_predicate, ) +from pydough.metadata.collections import CollectionMetadata from pydough.metadata.graphs import GraphMetadata from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/subcollection_relationship_metadata.py b/pydough/metadata/properties/subcollection_relationship_metadata.py index cf6d3a5fc..c5683a87b 100644 --- a/pydough/metadata/properties/subcollection_relationship_metadata.py +++ b/pydough/metadata/properties/subcollection_relationship_metadata.py @@ -7,8 +7,8 @@ from abc import abstractmethod +from pydough.errors.error_utils import HasType, is_bool from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import HasType, is_bool from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/table_column_metadata.py b/pydough/metadata/properties/table_column_metadata.py index 3b4c1e5a3..d2f234a0e 100644 --- a/pydough/metadata/properties/table_column_metadata.py +++ b/pydough/metadata/properties/table_column_metadata.py @@ -6,17 +6,16 @@ __all__ = ["TableColumnMetadata"] -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors import PyDoughMetadataException, PyDoughTypeException +from pydough.errors.error_utils import ( NoExtraKeys, - PyDoughMetadataException, extract_array, extract_object, extract_string, is_string, ) +from pydough.metadata.collections import CollectionMetadata from pydough.types import PyDoughType, parse_type_from_string -from pydough.types.errors import PyDoughTypeException from .property_metadata import PropertyMetadata from .scalar_attribute_metadata import ScalarAttributeMetadata diff --git a/pydough/pydough_operators/base_operator.py b/pydough/pydough_operators/base_operator.py index 82ccfacf9..a7750c1a3 100644 --- a/pydough/pydough_operators/base_operator.py +++ b/pydough/pydough_operators/base_operator.py @@ -7,6 +7,8 @@ from abc import abstractmethod from typing import Any +from pydough.errors import PyDoughQDAGException + from .type_inference import TypeVerifier @@ -55,7 +57,6 @@ def verify_allows_args(self, args: list[Any]) -> None: `PyDoughQDAGException` if the operator does not accept the provided arguments. """ - from pydough.qdag.errors import PyDoughQDAGException try: self.verifier.accepts(args) diff --git a/pydough/pydough_operators/operator_registry.py b/pydough/pydough_operators/operator_registry.py index 9ea52ae85..2042bcaf7 100644 --- a/pydough/pydough_operators/operator_registry.py +++ b/pydough/pydough_operators/operator_registry.py @@ -6,6 +6,8 @@ import inspect +from pydough.errors import PyDoughUnqualifiedException + from .base_operator import PyDoughOperator from .expression_operators import ( ExpressionFunctionOperator, @@ -52,7 +54,6 @@ def get_operator_by_name(name: str, **kwargs) -> ExpressionFunctionOperator: keyword arguments, or if keyword arguments are provided for an operator that does not support them. """ - from pydough.unqualified import PyDoughUnqualifiedException # Find the operator directly using inspect for op_name, obj in inspect.getmembers(REP): diff --git a/pydough/pydough_operators/type_inference/type_verifier.py b/pydough/pydough_operators/type_inference/type_verifier.py index 453d361d9..59c51c960 100644 --- a/pydough/pydough_operators/type_inference/type_verifier.py +++ b/pydough/pydough_operators/type_inference/type_verifier.py @@ -14,6 +14,8 @@ from abc import ABC, abstractmethod from typing import Any +from pydough.errors import PyDoughQDAGException + class TypeVerifier(ABC): """ @@ -71,8 +73,6 @@ def num_args(self) -> int: return self._num_args def accepts(self, args: list[Any], error_on_fail: bool = True) -> bool: - from pydough.qdag.errors import PyDoughQDAGException - if len(args) != self.num_args: if error_on_fail: suffix = "argument" if self._num_args == 1 else "arguments" @@ -137,8 +137,6 @@ def high_range(self) -> int: return self._high_range def accepts(self, args: list[Any], error_on_fail: bool = True) -> bool: - from pydough.qdag.errors import PyDoughQDAGException - if not (self.low_range <= len(args) <= self.high_range): if error_on_fail: raise PyDoughQDAGException( @@ -157,7 +155,6 @@ class RequireCollection(TypeVerifier): def accepts(self, args: list[Any], error_on_fail: bool = True) -> bool: from pydough.qdag.collections import PyDoughCollectionQDAG - from pydough.qdag.errors import PyDoughQDAGException if len(args) != 1: if error_on_fail: diff --git a/pydough/qdag/__init__.py b/pydough/qdag/__init__.py index 673113ddc..809e7f1e0 100644 --- a/pydough/qdag/__init__.py +++ b/pydough/qdag/__init__.py @@ -37,6 +37,8 @@ "WindowCall", ] +from pydough.errors import PyDoughQDAGException + from .abstract_pydough_qdag import PyDoughQDAG from .collections import ( Calculate, @@ -56,7 +58,6 @@ TopK, Where, ) -from .errors import PyDoughQDAGException from .expressions import ( BackReferenceExpression, ChildReferenceExpression, diff --git a/pydough/qdag/collections/calculate.py b/pydough/qdag/collections/calculate.py index eef11e9b4..7d6f6cfd9 100644 --- a/pydough/qdag/collections/calculate.py +++ b/pydough/qdag/collections/calculate.py @@ -9,8 +9,8 @@ from functools import cache +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions import ( BackReferenceExpression, PyDoughExpressionQDAG, diff --git a/pydough/qdag/collections/collection_access.py b/pydough/qdag/collections/collection_access.py index 80ac8f40c..7b0b06807 100644 --- a/pydough/qdag/collections/collection_access.py +++ b/pydough/qdag/collections/collection_access.py @@ -8,6 +8,7 @@ from functools import cache +from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, PropertyMetadata, @@ -16,7 +17,6 @@ ) from pydough.metadata.properties import SubcollectionRelationshipMetadata from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index aad57fc8d..c91634d62 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -13,8 +13,8 @@ import numpy as np +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions.collation_expression import CollationExpression from pydough.qdag.expressions.expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/collections/global_context.py b/pydough/qdag/collections/global_context.py index ceac79ac0..0be884f0b 100644 --- a/pydough/qdag/collections/global_context.py +++ b/pydough/qdag/collections/global_context.py @@ -7,12 +7,12 @@ __all__ = ["TableCollection"] +from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, GraphMetadata, ) from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions import CollationExpression from .collection_qdag import PyDoughCollectionQDAG diff --git a/pydough/qdag/collections/order_by.py b/pydough/qdag/collections/order_by.py index b02ab8f90..0322f5310 100644 --- a/pydough/qdag/collections/order_by.py +++ b/pydough/qdag/collections/order_by.py @@ -8,7 +8,7 @@ from functools import cache -from pydough.qdag.errors import PyDoughQDAGException +from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import CollationExpression from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite diff --git a/pydough/qdag/collections/partition_by.py b/pydough/qdag/collections/partition_by.py index a2eb7477a..c2554b83f 100644 --- a/pydough/qdag/collections/partition_by.py +++ b/pydough/qdag/collections/partition_by.py @@ -9,8 +9,8 @@ from functools import cache +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions import ( BackReferenceExpression, ChildReferenceExpression, diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index e0609e658..138dbcf3e 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -8,7 +8,7 @@ from functools import cache -from pydough.qdag.errors import PyDoughQDAGException +from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, diff --git a/pydough/qdag/collections/where.py b/pydough/qdag/collections/where.py index 1c790a06d..85d81bb69 100644 --- a/pydough/qdag/collections/where.py +++ b/pydough/qdag/collections/where.py @@ -8,7 +8,7 @@ from functools import cache -from pydough.qdag.errors import PyDoughQDAGException +from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import PyDoughExpressionQDAG from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite diff --git a/pydough/qdag/errors.py b/pydough/qdag/errors.py deleted file mode 100644 index f270f4f4e..000000000 --- a/pydough/qdag/errors.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -Definitions of the exception type used in the PyDough QDAG module. -""" - -__all__ = ["PyDoughQDAGException"] - - -class PyDoughQDAGException(Exception): - """Exception raised when there is an error relating to a PyDough QDAG, such - as malformed arguments/structure. - """ diff --git a/pydough/qdag/expressions/back_reference_expression.py b/pydough/qdag/expressions/back_reference_expression.py index 70498908c..e887541ec 100644 --- a/pydough/qdag/expressions/back_reference_expression.py +++ b/pydough/qdag/expressions/back_reference_expression.py @@ -4,8 +4,8 @@ """ __all__ = ["BackReferenceExpression"] +from pydough.errors import PyDoughQDAGException from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.types import PyDoughType from .expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/expressions/child_reference_expression.py b/pydough/qdag/expressions/child_reference_expression.py index 5b1b7d15f..aeaa9e0de 100644 --- a/pydough/qdag/expressions/child_reference_expression.py +++ b/pydough/qdag/expressions/child_reference_expression.py @@ -8,9 +8,9 @@ from functools import cache +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG -from pydough.qdag.errors import PyDoughQDAGException from .expression_qdag import PyDoughExpressionQDAG from .reference import Reference diff --git a/pydough/qdag/expressions/column_property.py b/pydough/qdag/expressions/column_property.py index 39d77d2e4..5756f7f13 100644 --- a/pydough/qdag/expressions/column_property.py +++ b/pydough/qdag/expressions/column_property.py @@ -5,9 +5,9 @@ __all__ = ["ColumnProperty"] +from pydough.errors import PyDoughQDAGException from pydough.metadata.properties import TableColumnMetadata from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.types import PyDoughType from .expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/expressions/reference.py b/pydough/qdag/expressions/reference.py index 5cc95597f..ebbb5c468 100644 --- a/pydough/qdag/expressions/reference.py +++ b/pydough/qdag/expressions/reference.py @@ -6,9 +6,9 @@ __all__ = ["Reference"] +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.types import PyDoughType from .expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/expressions/sided_reference.py b/pydough/qdag/expressions/sided_reference.py index 6b9885d23..c8b150fcb 100644 --- a/pydough/qdag/expressions/sided_reference.py +++ b/pydough/qdag/expressions/sided_reference.py @@ -6,9 +6,9 @@ __all__ = ["SidedReference"] +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.types import PyDoughType from .expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/node_builder.py b/pydough/qdag/node_builder.py index 3bfd3e829..d20b26ad2 100644 --- a/pydough/qdag/node_builder.py +++ b/pydough/qdag/node_builder.py @@ -4,12 +4,11 @@ __all__ = ["AstNodeBuilder"] - +from pydough.errors import PyDoughMetadataException, PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, GraphMetadata, PropertyMetadata, - PyDoughMetadataException, TableColumnMetadata, ) from pydough.pydough_operators import ( @@ -33,7 +32,6 @@ TopK, Where, ) -from .errors import PyDoughQDAGException from .expressions import ( BackReferenceExpression, ChildReferenceExpression, diff --git a/pydough/types/array_type.py b/pydough/types/array_type.py index 445349e97..03da236cb 100644 --- a/pydough/types/array_type.py +++ b/pydough/types/array_type.py @@ -6,7 +6,8 @@ import re -from .errors import PyDoughTypeException +from pydough.errors import PyDoughTypeException + from .pydough_type import PyDoughType diff --git a/pydough/types/errors.py b/pydough/types/errors.py deleted file mode 100644 index c360c18cc..000000000 --- a/pydough/types/errors.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -Error-handling definitions for the types module. -""" - -__all__ = ["PyDoughTypeException"] - - -class PyDoughTypeException(Exception): - """Exception raised when there is an error relating to PyDough types, such - as malformed inputs to a parametrized type or a string that cannot be - parsed into a type. - """ diff --git a/pydough/types/map_type.py b/pydough/types/map_type.py index aaf6a104a..6dd0480f7 100644 --- a/pydough/types/map_type.py +++ b/pydough/types/map_type.py @@ -6,7 +6,8 @@ import re -from .errors import PyDoughTypeException +from pydough.errors import PyDoughTypeException + from .pydough_type import PyDoughType diff --git a/pydough/types/parse_types.py b/pydough/types/parse_types.py index bcc741ff2..fa85e0734 100644 --- a/pydough/types/parse_types.py +++ b/pydough/types/parse_types.py @@ -5,10 +5,11 @@ __all__ = ["parse_type_from_string"] +from pydough.errors import PyDoughTypeException + from .array_type import ArrayType from .boolean_type import BooleanType from .datetime_type import DatetimeType -from .errors import PyDoughTypeException from .map_type import MapType from .numeric_type import NumericType from .pydough_type import PyDoughType diff --git a/pydough/types/struct_type.py b/pydough/types/struct_type.py index 95aba8152..7e7564bc1 100644 --- a/pydough/types/struct_type.py +++ b/pydough/types/struct_type.py @@ -6,7 +6,8 @@ import re -from .errors import PyDoughTypeException +from pydough.errors import PyDoughTypeException + from .pydough_type import PyDoughType diff --git a/pydough/unqualified/__init__.py b/pydough/unqualified/__init__.py index 39522204b..3219f1869 100644 --- a/pydough/unqualified/__init__.py +++ b/pydough/unqualified/__init__.py @@ -5,7 +5,6 @@ """ __all__ = [ - "PyDoughUnqualifiedException", "UnqualifiedAccess", "UnqualifiedBinaryOperation", "UnqualifiedCalculate", @@ -27,7 +26,6 @@ "transform_code", ] -from .errors import PyDoughUnqualifiedException from .qualification import qualify_node, qualify_term from .unqualified_node import ( UnqualifiedAccess, diff --git a/pydough/unqualified/errors.py b/pydough/unqualified/errors.py deleted file mode 100644 index 261cea209..000000000 --- a/pydough/unqualified/errors.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -Error handling definitions used for the unqualified module. -""" - -__all__ = ["PyDoughUnqualifiedException"] - - -class PyDoughUnqualifiedException(Exception): - """Exception raised when there is an error relating to the PyDough - unqualified form, such as a Python object that cannot be coerced. - """ diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index bfdaa4e6e..9d05dcca8 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -9,6 +9,7 @@ import pydough from pydough.configs import PyDoughConfigs +from pydough.errors import PyDoughUnqualifiedException from pydough.metadata import GeneralJoinMetadata, GraphMetadata from pydough.pydough_operators import get_operator_by_name from pydough.pydough_operators.expression_operators import ( @@ -40,7 +41,6 @@ ) from pydough.types import PyDoughType -from .errors import PyDoughUnqualifiedException from .unqualified_node import ( UnqualifiedAccess, UnqualifiedBest, diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 322a7ac2e..58256074e 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -27,8 +27,9 @@ from typing import Any, Union import pydough.pydough_operators as pydop +from pydough.errors import PyDoughUnqualifiedException +from pydough.errors.error_utils import is_bool, is_integer, is_positive_int, is_string from pydough.metadata import GraphMetadata -from pydough.metadata.errors import is_bool, is_integer, is_positive_int, is_string from pydough.pydough_operators import get_operator_by_name from pydough.types import ( ArrayType, @@ -40,8 +41,6 @@ UnknownType, ) -from .errors import PyDoughUnqualifiedException - class UnqualifiedNode(ABC): """ diff --git a/tests/test_metadata_errors.py b/tests/test_metadata_errors.py index 29c78c6a4..40ed05617 100644 --- a/tests/test_metadata_errors.py +++ b/tests/test_metadata_errors.py @@ -9,7 +9,8 @@ from pydough import parse_json_metadata_from_file from pydough.configs import PyDoughConfigs -from pydough.metadata import CollectionMetadata, GraphMetadata, PyDoughMetadataException +from pydough.errors import PyDoughMetadataException +from pydough.metadata import CollectionMetadata, GraphMetadata from pydough.unqualified import UnqualifiedNode, qualify_node, transform_code from tests.testing_utilities import graph_fetcher From 87284872bcc32fd65dc135110868569cd93d11c7 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 8 Jul 2025 10:38:52 -0400 Subject: [PATCH 002/143] Changed more errors to be PyDough exceptions --- pydough/configs/pydough_configs.py | 4 +- pydough/conversion/hybrid_translator.py | 3 +- pydough/conversion/hybrid_tree.py | 2 +- .../database_connectors/builtin_databases.py | 6 ++- .../database_connectors/database_connector.py | 9 ++-- .../database_connectors/empty_connection.py | 10 +++-- pydough/errors/__init__.py | 8 ++++ pydough/errors/error_types.py | 41 +++++++++++++++++-- pydough/evaluation/evaluate_unqualified.py | 14 ++++--- pydough/exploration/explain.py | 8 ++-- pydough/jupyter_extensions/pydough_magic.py | 3 +- .../expression_operators/binary_operators.py | 3 +- .../qdag/collections/collection_tree_form.py | 6 ++- pydough/sqlglot/execute_relational.py | 10 ++--- .../sqlglot_relational_expression_visitor.py | 5 ++- .../base_transform_bindings.py | 41 ++++++++++--------- .../sqlglot_transform_utils.py | 26 ++++++++---- .../sqlite_transform_bindings.py | 11 +++-- pydough/unqualified/unqualified_node.py | 2 +- tests/conftest.py | 3 +- tests/test_documentation.py | 3 +- tests/test_sqlite_connection.py | 9 ++-- tests/testing_utilities.py | 21 +++++++--- 23 files changed, 168 insertions(+), 80 deletions(-) diff --git a/pydough/configs/pydough_configs.py b/pydough/configs/pydough_configs.py index ea93b6de5..0d895cc53 100644 --- a/pydough/configs/pydough_configs.py +++ b/pydough/configs/pydough_configs.py @@ -7,6 +7,8 @@ from enum import Enum from typing import Any, Generic, TypeVar +from pydough.errors import PyDoughSessionException + T = TypeVar("T") @@ -126,5 +128,5 @@ class PyDoughConfigs: def __setattr__(self, name: str, value: Any) -> None: if name not in dir(self): - raise AttributeError(f"Unrecognized PyDough config name: {name}") + raise PyDoughSessionException(f"Unrecognized PyDough config name: {name}") super().__setattr__(name, value) diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index b376ce944..18e4e8de3 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -9,6 +9,7 @@ import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs from pydough.database_connectors import DatabaseDialect +from pydough.errors import PyDoughSQLException from pydough.metadata import ( CartesianProductMetadata, GeneralJoinMetadata, @@ -806,7 +807,7 @@ def rewrite_quantile_call( or not isinstance(expr.args[1].literal.value, (int, float)) or not (0.0 <= float(expr.args[1].literal.value) <= 1.0) ): - raise ValueError( + raise PyDoughSQLException( f"Expected second argument to QUANTILE to be a numeric literal between 0 and 1, instead received {expr.args[1]!r}" ) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 9a52f3e50..749eb3d71 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -605,7 +605,7 @@ def add_successor(self, successor: "HybridTree") -> None: `successor`: the HybridTree to be marked as one level below `self`. """ if self._successor is not None: - raise Exception("Duplicate successor") + raise ValueError("Duplicate successor") self._successor = successor successor._parent = self # Shift the aggregation keys and rhs of join keys back by 1 level to diff --git a/pydough/database_connectors/builtin_databases.py b/pydough/database_connectors/builtin_databases.py index 1fc483db4..29069b908 100644 --- a/pydough/database_connectors/builtin_databases.py +++ b/pydough/database_connectors/builtin_databases.py @@ -5,6 +5,8 @@ import sqlite3 +from pydough.errors import PyDoughSessionException + from .database_connector import DatabaseConnection, DatabaseContext, DatabaseDialect __all__ = ["load_database_context", "load_sqlite_connection"] @@ -31,7 +33,7 @@ def load_database_context(database_name: str, **kwargs) -> DatabaseContext: connection = load_sqlite_connection(**kwargs) dialect = DatabaseDialect.SQLITE case _: - raise ValueError( + raise PyDoughSessionException( f"Unsupported database: {database_name}. The supported databases are: {supported_databases}." "Any other database must be created manually by specifying the connection and dialect." ) @@ -47,6 +49,6 @@ def load_sqlite_connection(**kwargs) -> DatabaseConnection: DatabaseConnection: A database connection object for SQLite. """ if "database" not in kwargs: - raise ValueError("SQLite connection requires a database path.") + raise PyDoughSessionException("SQLite connection requires a database path.") connection: sqlite3.Connection = sqlite3.connect(**kwargs) return DatabaseConnection(connection) diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index 4a171a37b..aabcb4f7c 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -3,7 +3,8 @@ by leveraging PEP 249 (Python Database API Specification v2.0). https://peps.python.org/pep-0249/ """ -# Copyright (C) 2024 Bodo Inc. All rights reserved. + +__all__ = ["DatabaseConnection", "DatabaseContext", "DatabaseDialect"] import sqlite3 from dataclasses import dataclass @@ -11,7 +12,7 @@ import pandas as pd -__all__ = ["DatabaseConnection", "DatabaseContext", "DatabaseDialect"] +from pydough.errors import PyDoughSessionException, PyDoughSQLException class DatabaseConnection: @@ -47,7 +48,7 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: cursor.execute(sql) except sqlite3.OperationalError as e: print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") - raise e + raise PyDoughSQLException(*e.args) from e column_names: list[str] = [description[0] for description in cursor.description] # No need to close the cursor, as its closed by del. # TODO: (gh #174) Cache the cursor? @@ -92,7 +93,7 @@ def from_string(dialect: str) -> "DatabaseDialect": elif dialect == "sqlite": return DatabaseDialect.SQLITE else: - raise ValueError(f"Unsupported dialect: {dialect}") + raise PyDoughSessionException(f"Unsupported dialect: {dialect}") @dataclass diff --git a/pydough/database_connectors/empty_connection.py b/pydough/database_connectors/empty_connection.py index 0629e2542..384c94d65 100644 --- a/pydough/database_connectors/empty_connection.py +++ b/pydough/database_connectors/empty_connection.py @@ -9,6 +9,8 @@ __all__ = ["empty_connection"] +from pydough.errors import PyDoughSessionException + from .database_connector import DatabaseConnection @@ -22,16 +24,16 @@ def __init__(self): pass def commit(self): - raise ValueError("No SQL Database is specified.") + raise PyDoughSessionException("No SQL Database is specified.") def close(self): - raise ValueError("No SQL Database is specified.") + raise PyDoughSessionException("No SQL Database is specified.") def rollback(self): - raise ValueError("No SQL Database is specified.") + raise PyDoughSessionException("No SQL Database is specified.") def cursor(self, *args, **kwargs): - raise ValueError("No SQL Database is specified.") + raise PyDoughSessionException("No SQL Database is specified.") empty_connection: DatabaseConnection = DatabaseConnection(EmptyConnection()) diff --git a/pydough/errors/__init__.py b/pydough/errors/__init__.py index 1a89f4ceb..ea9668f25 100644 --- a/pydough/errors/__init__.py +++ b/pydough/errors/__init__.py @@ -3,15 +3,23 @@ """ __all__ = [ + "PyDoughException", "PyDoughMetadataException", "PyDoughQDAGException", + "PyDoughSQLException", + "PyDoughSessionException", + "PyDoughTestingException", "PyDoughTypeException", "PyDoughUnqualifiedException", ] from .error_types import ( + PyDoughException, PyDoughMetadataException, PyDoughQDAGException, + PyDoughSessionException, + PyDoughSQLException, + PyDoughTestingException, PyDoughTypeException, PyDoughUnqualifiedException, ) diff --git a/pydough/errors/error_types.py b/pydough/errors/error_types.py index 5951bb2d0..d0f743259 100644 --- a/pydough/errors/error_types.py +++ b/pydough/errors/error_types.py @@ -3,21 +3,40 @@ """ __all__ = [ + "PyDoughException", "PyDoughMetadataException", "PyDoughQDAGException", + "PyDoughSQLException", + "PyDoughSessionException", + "PyDoughTestingException", "PyDoughTypeException", "PyDoughUnqualifiedException", ] -class PyDoughMetadataException(Exception): +class PyDoughException(Exception): + """ + Base class for all PyDough exceptions. + """ + + +class PyDoughSessionException(PyDoughException): + """ + Exception raised when something goes wrong with the PyDough session or + configs, such as assigning to a configuration that does not exist, or + not mounting a graph or database to the session when they are needed, + or issues with the setup of the database. + """ + + +class PyDoughMetadataException(PyDoughException): """ Exception raised when there is an error relating to PyDough metadata, such as an error while parsing/validating the JSON or an ill-formed pattern. """ -class PyDoughUnqualifiedException(Exception): +class PyDoughUnqualifiedException(PyDoughException): """ Exception raised when there is an error relating to the PyDough unqualified form, such as a Python object that cannot be coerced or an @@ -25,7 +44,7 @@ class PyDoughUnqualifiedException(Exception): """ -class PyDoughQDAGException(Exception): +class PyDoughQDAGException(PyDoughException): """ Exception raised when there is an error relating to a PyDough QDAG, such as malformed arguments/structure, undefined term accesses, singular vs @@ -34,9 +53,23 @@ class PyDoughQDAGException(Exception): """ -class PyDoughTypeException(Exception): +class PyDoughTypeException(PyDoughException): """ Exception raised when there is an error relating to PyDough types, such as malformed inputs to a parametrized type or a string that cannot be parsed into a type. """ + + +class PyDoughSQLException(PyDoughException): + """ + Exception caused by a malformation in the SQL that causes bugs during SQL + generation, SQL rewrites/optimization or, or errors during SQL execution. + """ + + +class PyDoughTestingException(PyDoughException): + """ + Exception raised within PyDough testing logic to indicate that something + has gone wrong, e.g. when the AstNodeTestInfo classes are used incorrectly. + """ diff --git a/pydough/evaluation/evaluate_unqualified.py b/pydough/evaluation/evaluate_unqualified.py index 0b17c3450..819f44882 100644 --- a/pydough/evaluation/evaluate_unqualified.py +++ b/pydough/evaluation/evaluate_unqualified.py @@ -12,6 +12,10 @@ from pydough.configs import PyDoughConfigs from pydough.conversion import convert_ast_to_relational from pydough.database_connectors import DatabaseContext +from pydough.errors import ( + PyDoughQDAGException, + PyDoughSessionException, +) from pydough.metadata import GraphMetadata from pydough.qdag import PyDoughCollectionQDAG, PyDoughQDAG from pydough.relational import RelationalRoot @@ -42,7 +46,7 @@ def _load_session_info( metadata = kwargs.pop("metadata") else: if pydough.active_session.metadata is None: - raise ValueError( + raise PyDoughSessionException( "Cannot evaluate Pydough without a metadata graph. " "Please call `pydough.active_session.load_metadata_graph()`." ) @@ -94,11 +98,11 @@ def _load_column_selection(kwargs: dict[str, object]) -> list[tuple[str, str]] | ) result.append((alias, column)) else: - raise TypeError( + raise PyDoughQDAGException( f"Expected `columns` argument to be a list or dictionary, found {columns_arg.__class__.__name__}" ) if len(result) == 0: - raise ValueError("Column selection must not be empty") + raise PyDoughQDAGException("Column selection must not be empty") return result @@ -124,7 +128,7 @@ def to_sql(node: UnqualifiedNode, **kwargs) -> str: graph, config, database = _load_session_info(**kwargs) qualified: PyDoughQDAG = qualify_node(node, graph, config) if not isinstance(qualified, PyDoughCollectionQDAG): - raise TypeError( + raise PyDoughQDAGException( f"Final qualified expression must be a collection, found {qualified.__class__.__name__}" ) relational: RelationalRoot = convert_ast_to_relational( @@ -157,7 +161,7 @@ def to_df(node: UnqualifiedNode, **kwargs) -> pd.DataFrame: graph, config, database = _load_session_info(**kwargs) qualified: PyDoughQDAG = qualify_node(node, graph, config) if not isinstance(qualified, PyDoughCollectionQDAG): - raise TypeError( + raise PyDoughQDAGException( f"Final qualified expression must be a collection, found {qualified.__class__.__name__}" ) relational: RelationalRoot = convert_ast_to_relational( diff --git a/pydough/exploration/explain.py b/pydough/exploration/explain.py index 8e16afa90..045bb1f02 100644 --- a/pydough/exploration/explain.py +++ b/pydough/exploration/explain.py @@ -130,7 +130,7 @@ def explain_property(property: PropertyMetadata, verbose: bool) -> str: for cond_str in conditions: lines.append(f" {cond_str}") case _: - raise ValueError( + raise NotImplementedError( f"Unrecognized type of property: {property.__class__.__name__}" ) else: @@ -138,7 +138,7 @@ def explain_property(property: PropertyMetadata, verbose: bool) -> str: f"Use pydough.explain(graph['{collection_name}']['{property_name}'], verbose=True) to learn more details." ) case _: - raise ValueError( + raise NotImplementedError( f"Unrecognized type of property: {property.__class__.__name__}" ) return "\n".join(lines) @@ -183,7 +183,7 @@ def explain_collection(collection: CollectionMetadata, verbose: bool) -> str: f"Unique properties of collection: {collection.unique_properties}" ) else: - raise ValueError( + raise NotImplementedError( f"Unrecognized type of collection: {collection.__class__.__name__}" ) if len(scalar_properties) == 0: @@ -527,6 +527,6 @@ def explain( case UnqualifiedNode(): return explain_unqualified(data, verbose) case _: - raise ValueError( + raise NotImplementedError( f"Cannot call pydough.explain on argument of type {data.__class__.__name__}" ) diff --git a/pydough/jupyter_extensions/pydough_magic.py b/pydough/jupyter_extensions/pydough_magic.py index e422ebc8b..9b9184118 100644 --- a/pydough/jupyter_extensions/pydough_magic.py +++ b/pydough/jupyter_extensions/pydough_magic.py @@ -10,6 +10,7 @@ ) import pydough +from pydough.errors import PyDoughSessionException from pydough.metadata import GraphMetadata from pydough.unqualified import transform_cell @@ -33,7 +34,7 @@ def pydough(self, line="", cell="", local_ns=None): cell = self.shell.var_expand(cell) graph: GraphMetadata | None = pydough.active_session.metadata if graph is None: - raise Exception( + raise PyDoughSessionException( "No active graph set in PyDough session." " Please set a graph using" " pydough.active_session.load_metadata_graph(...)" diff --git a/pydough/pydough_operators/expression_operators/binary_operators.py b/pydough/pydough_operators/expression_operators/binary_operators.py index eeed8319b..2fb74135b 100644 --- a/pydough/pydough_operators/expression_operators/binary_operators.py +++ b/pydough/pydough_operators/expression_operators/binary_operators.py @@ -6,6 +6,7 @@ from enum import Enum +from pydough.errors import PyDoughQDAGException from pydough.pydough_operators.type_inference import ( ExpressionTypeDeducer, TypeVerifier, @@ -43,7 +44,7 @@ def from_string(s: str) -> "BinOp": for op in BinOp.__members__.values(): if s == op.value: return op - raise ValueError(f"Unrecognized operation: {s!r}") + raise PyDoughQDAGException(f"Unrecognized operation: {s!r}") BinOp.__members__.items() diff --git a/pydough/qdag/collections/collection_tree_form.py b/pydough/qdag/collections/collection_tree_form.py index d86ea7f82..be3c533ae 100644 --- a/pydough/qdag/collections/collection_tree_form.py +++ b/pydough/qdag/collections/collection_tree_form.py @@ -8,6 +8,8 @@ from typing import Union +from pydough.errors import PyDoughQDAGException + class CollectionTreeForm: """ @@ -82,7 +84,7 @@ def to_string_rows(self) -> list[str]: case (True, True): answer = [f"{self.ROOT_PARENT_PREDECESSOR} {self.item_str}"] case _: - raise Exception("Malformed collection tree form") + raise PyDoughQDAGException("Malformed collection tree form") else: answer = ( [] if self.predecessor is None else self.predecessor.to_string_rows() @@ -97,7 +99,7 @@ def to_string_rows(self) -> list[str]: case (True, True): answer.append(f"{prefix}{self.SUCCESSOR_PARENT} {self.item_str}") case _: - raise Exception("Malformed collection tree form") + raise PyDoughQDAGException("Malformed collection tree form") new_prefix: str = f"{prefix}{self.CHILD_SPACER if self.has_successor else self.PREDECESSOR_SPACER}" for idx, child in enumerate(self.nested_trees): is_last_child: bool = idx == len(self.nested_trees) - 1 diff --git a/pydough/sqlglot/execute_relational.py b/pydough/sqlglot/execute_relational.py index 0b569dad9..ac204f492 100644 --- a/pydough/sqlglot/execute_relational.py +++ b/pydough/sqlglot/execute_relational.py @@ -29,6 +29,7 @@ DatabaseContext, DatabaseDialect, ) +from pydough.errors import PyDoughSQLException from pydough.logger import get_logger from pydough.relational import RelationalRoot from pydough.relational.relational_expressions import ( @@ -66,10 +67,9 @@ def convert_relation_to_sql( try: glot_expr = apply_sqlglot_optimizer(glot_expr, relational, sqlglot_dialect) except SqlglotError as e: - print( - f"ERROR WHILE OPTIMIZING QUERY:\n{glot_expr.sql(sqlglot_dialect, pretty=True)}" - ) - raise e + sql_text: str = glot_expr.sql(sqlglot_dialect, pretty=True) + print(f"ERROR WHILE OPTIMIZING QUERY:\n{sql_text}") + raise PyDoughSQLException(*e.args) # Convert the optimized AST back to a SQL string. return glot_expr.sql(sqlglot_dialect, pretty=True) @@ -268,7 +268,7 @@ def convert_dialect_to_sqlglot(dialect: DatabaseDialect) -> SQLGlotDialect: elif dialect == DatabaseDialect.SQLITE: return SQLiteDialect() else: - raise ValueError(f"Unsupported dialect: {dialect}") + raise NotImplementedError(f"Unsupported dialect: {dialect}") def execute_df( diff --git a/pydough/sqlglot/sqlglot_relational_expression_visitor.py b/pydough/sqlglot/sqlglot_relational_expression_visitor.py index 943c4ac48..f27c2c392 100644 --- a/pydough/sqlglot/sqlglot_relational_expression_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_expression_visitor.py @@ -13,6 +13,7 @@ from pydough.configs import PyDoughConfigs from pydough.database_connectors import DatabaseDialect +from pydough.errors import PyDoughSQLException from pydough.relational import ( CallExpression, ColumnReference, @@ -218,7 +219,7 @@ def visit_window_expression(self, window_expression: WindowCallExpression) -> No case "PREV" | "NEXT": offset = window_expression.kwargs.get("n", 1) if not isinstance(offset, int): - raise ValueError( + raise PyDoughSQLException( f"Invalid 'n' argument to {window_expression.op.function_name}: {offset!r} (expected an integer)" ) # By default, we use the LAG function. If doing NEXT, switch @@ -288,7 +289,7 @@ def visit_literal_expression(self, literal_expression: LiteralExpression) -> Non if isinstance(literal_expression.value, datetime.datetime): dt: datetime.datetime = literal_expression.value if dt.tzinfo is not None: - raise ValueError( + raise PyDoughSQLException( "PyDough does not yet support datetime values with a timezone" ) literal = sqlglot_expressions.Cast( diff --git a/pydough/sqlglot/transform_bindings/base_transform_bindings.py b/pydough/sqlglot/transform_bindings/base_transform_bindings.py index 34dd64be3..eef8faa80 100644 --- a/pydough/sqlglot/transform_bindings/base_transform_bindings.py +++ b/pydough/sqlglot/transform_bindings/base_transform_bindings.py @@ -13,6 +13,7 @@ import pydough.pydough_operators as pydop from pydough.configs import DayOfWeek, PyDoughConfigs +from pydough.errors import PyDoughSQLException from pydough.types import BooleanType, NumericType, PyDoughType, StringType from .sqlglot_transform_utils import ( @@ -575,11 +576,11 @@ def convert_slice( try: start_idx = int(start.this) except ValueError: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the start index being integer literal or absent." ) else: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the start index being integer literal or absent." ) @@ -589,11 +590,11 @@ def convert_slice( try: stop_idx = int(stop.this) except ValueError: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the stop index being integer literal or absent." ) else: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the stop index being integer literal or absent." ) @@ -603,15 +604,15 @@ def convert_slice( try: step_idx = int(step.this) if step_idx != 1: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the step being integer literal 1 or absent." ) except ValueError: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the step being integer literal 1 or absent." ) else: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the step being integer literal 1 or absent." ) @@ -622,7 +623,7 @@ def convert_slice( match (start_idx, stop_idx): case (None, None): - raise string_expr + return string_expr case (_, None): assert start_idx is not None if start_idx > 0: @@ -1207,14 +1208,14 @@ def convert_round( not isinstance(args[1], sqlglot_expressions.Literal) or args[1].is_string ): - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument {args[1]} for ROUND." "The precision argument should be an integer literal." ) try: int(args[1].this) except ValueError: - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument {args[1]} for ROUND." "The precision argument should be an integer literal." ) @@ -1281,14 +1282,14 @@ def convert_datediff( assert len(args) == 3 # Check if unit is a string. if not (isinstance(args[0], sqlglot_expressions.Literal) and args[0].is_string): - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument for DATEDIFF: {args[0]!r}. It should be a string literal." ) x = self.make_datetime_arg(args[1]) y = self.make_datetime_arg(args[2]) unit: DateTimeUnit | None = DateTimeUnit.from_string(args[0].this) if unit is None: - raise ValueError(f"Unsupported argument '{unit}' for DATEDIFF.") + raise PyDoughSQLException(f"Unsupported argument '{unit}' for DATEDIFF.") answer = sqlglot_expressions.DateDiff( unit=sqlglot_expressions.Var(this=unit.value), this=y, expression=x ) @@ -1407,7 +1408,7 @@ def convert_datetime( # truncation. unit = DateTimeUnit.from_string(str(trunc_match.group(1))) if unit is None: - raise ValueError( + raise PyDoughSQLException( f"Unsupported DATETIME modifier string: {arg.this!r}" ) result = self.apply_datetime_truncation(result, unit) @@ -1419,12 +1420,14 @@ def convert_datetime( amt *= -1 unit = DateTimeUnit.from_string(str(offset_match.group(3))) if unit is None: - raise ValueError( + raise PyDoughSQLException( f"Unsupported DATETIME modifier string: {arg.this!r}" ) result = self.apply_datetime_offset(result, amt, unit) else: - raise ValueError(f"Unsupported DATETIME modifier string: {arg.this!r}") + raise PyDoughSQLException( + f"Unsupported DATETIME modifier string: {arg.this!r}" + ) return result def convert_extract_datetime( @@ -1618,7 +1621,7 @@ def convert_string( not isinstance(args[1], sqlglot_expressions.Literal) or not args[1].is_string ): - raise ValueError( + raise PyDoughSQLException( f"STRING(X,Y) requires the second argument to be a string date format literal, but received {args[1]}" ) return sqlglot_expressions.TimeToStr(this=args[0], format=args[1]) @@ -1730,7 +1733,7 @@ def convert_count( elif len(args) == 1: return sqlglot_expressions.Count(this=args[0]) else: - raise ValueError(f"COUNT expects 0 or 1 argument, got {len(args)}") + raise PyDoughSQLException(f"COUNT expects 0 or 1 argument, got {len(args)}") def convert_quantile( self, args: list[SQLGlotExpression], types: list[PyDoughType] @@ -1762,8 +1765,8 @@ def convert_quantile( or args[1].is_string or not (0.0 <= float(args[1].this) <= 1.0) ): - raise ValueError( - f"QUANTILE TEST argument to be a numeric literal between 0 and 1, got {args[1]}" + raise PyDoughSQLException( + f"QUANTILE expected second argument to be a numeric literal between 0 and 1, got {args[1]}" ) percentile_disc_function: SQLGlotExpression = ( diff --git a/pydough/sqlglot/transform_bindings/sqlglot_transform_utils.py b/pydough/sqlglot/transform_bindings/sqlglot_transform_utils.py index f1c74f08a..bce06a33f 100644 --- a/pydough/sqlglot/transform_bindings/sqlglot_transform_utils.py +++ b/pydough/sqlglot/transform_bindings/sqlglot_transform_utils.py @@ -21,6 +21,8 @@ from sqlglot.expressions import Binary, Case, Concat, Is, Paren, Unary from sqlglot.expressions import Expression as SQLGlotExpression +from pydough.errors import PyDoughSQLException + PAREN_EXPRESSIONS = (Binary, Unary, Concat, Is, Case) """ The types of SQLGlot expressions that need to be wrapped in parenthesis for the @@ -165,11 +167,15 @@ def truncation_string(self) -> str: case DateTimeUnit.YEAR: return "'%Y-01-01 00:00:00'" case DateTimeUnit.QUARTER: - raise ValueError("Quarter unit does not have a truncation string.") + raise PyDoughSQLException( + "Quarter unit does not have a truncation string." + ) case DateTimeUnit.MONTH: return "'%Y-%m-01 00:00:00'" case DateTimeUnit.WEEK: - raise ValueError("Week unit does not have a truncation string.") + raise PyDoughSQLException( + "Week unit does not have a truncation string." + ) case DateTimeUnit.DAY: return "'%Y-%m-%d 00:00:00'" case DateTimeUnit.HOUR: @@ -188,7 +194,9 @@ def extraction_string(self) -> str: case DateTimeUnit.YEAR: return "'%Y'" case DateTimeUnit.QUARTER: - raise ValueError("Quarter unit does not have an extraction string.") + raise PyDoughSQLException( + "Quarter unit does not have an extraction string." + ) case DateTimeUnit.MONTH: return "'%m'" case DateTimeUnit.WEEK: @@ -257,22 +265,24 @@ def pad_helper( try: required_len = int(args[1].this) if required_len < 0: - raise ValueError() + raise PyDoughSQLException( + f"{pad_func} function requires the length argument to be a non-negative integer literal." + ) except ValueError: - raise ValueError( + raise PyDoughSQLException( f"{pad_func} function requires the length argument to be a non-negative integer literal." ) else: - raise ValueError( + raise PyDoughSQLException( f"{pad_func} function requires the length argument to be a non-negative integer literal." ) if not isinstance(args[2], sqlglot_expressions.Literal) or not args[2].is_string: - raise ValueError( + raise PyDoughSQLException( f"{pad_func} function requires the padding argument to be a string literal of length 1." ) if len(str(args[2].this)) != 1: - raise ValueError( + raise PyDoughSQLException( f"{pad_func} function requires the padding argument to be a string literal of length 1." ) diff --git a/pydough/sqlglot/transform_bindings/sqlite_transform_bindings.py b/pydough/sqlglot/transform_bindings/sqlite_transform_bindings.py index c3de15b5a..402030443 100644 --- a/pydough/sqlglot/transform_bindings/sqlite_transform_bindings.py +++ b/pydough/sqlglot/transform_bindings/sqlite_transform_bindings.py @@ -10,6 +10,7 @@ from sqlglot.expressions import Expression as SQLGlotExpression import pydough.pydough_operators as pydop +from pydough.errors import PyDoughSQLException from pydough.types import DatetimeType, NumericType, PyDoughType, StringType from .base_transform_bindings import BaseTransformBindings @@ -84,11 +85,11 @@ def convert_datediff( ) -> SQLGlotExpression: assert len(args) == 3 if not isinstance(args[0], sqlglot_expressions.Literal): - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument {args[0]} for DATEDIFF.It should be a string." ) elif not args[0].is_string: - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument {args[0]} for DATEDIFF.It should be a string." ) unit: DateTimeUnit | None = DateTimeUnit.from_string(args[0].this) @@ -291,7 +292,9 @@ def convert_datediff( ) return secs_diff case _: - raise ValueError(f"Unsupported argument '{unit}' for DATEDIFF.") + raise PyDoughSQLException( + f"Unsupported argument '{unit}' for DATEDIFF." + ) def convert_quarter( self, @@ -580,7 +583,7 @@ def convert_variance( ) ) else: - raise ValueError(f"Unsupported type: {type}") + raise PyDoughSQLException(f"Unsupported typ for variance/std: {type}") def convert_std( self, args: list[SQLGlotExpression], types: list[PyDoughType], type: str diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 58256074e..bf41f0e6c 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -108,7 +108,7 @@ def __setattr__(self, name: str, value: object) -> None: super().__setattr__(name, value) else: # TODO: support using setattr to add/mutate properties. - raise AttributeError( + raise PyDoughUnqualifiedException( "PyDough objects do not yet support writing properties to them." ) diff --git a/tests/conftest.py b/tests/conftest.py index b9a7a6fb3..95a050e2c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,7 @@ DatabaseDialect, empty_connection, ) +from pydough.errors import PyDoughTestingException from pydough.metadata.graphs import GraphMetadata from pydough.qdag import AstNodeBuilder from tests.testing_utilities import graph_fetcher @@ -121,7 +122,7 @@ def get_sample_graph( @cache def impl(name: str) -> GraphMetadata: if name not in valid_sample_graph_names: - raise Exception(f"Unrecognized graph name '{name}'") + raise PyDoughTestingException(f"Unrecognized graph name '{name}'") return pydough.parse_json_metadata_from_file( file_path=sample_graph_path, graph_name=name ) diff --git a/tests/test_documentation.py b/tests/test_documentation.py index 83fc7b113..6265f4c47 100644 --- a/tests/test_documentation.py +++ b/tests/test_documentation.py @@ -3,6 +3,7 @@ """ import pydough.pydough_operators as pydop +from pydough.errors import PyDoughTestingException def test_function_list(): @@ -34,6 +35,6 @@ def test_function_list(): # any that remain function_names.difference_update(headers) if function_names: - raise Exception( + raise PyDoughTestingException( "The following functions are not documented: " + ", ".join(function_names) ) diff --git a/tests/test_sqlite_connection.py b/tests/test_sqlite_connection.py index 9dea66157..4f5a2995b 100644 --- a/tests/test_sqlite_connection.py +++ b/tests/test_sqlite_connection.py @@ -13,6 +13,7 @@ DatabaseDialect, load_database_context, ) +from pydough.errors import PyDoughSessionException def test_query_execution(sqlite_people_jobs: DatabaseConnection) -> None: @@ -56,7 +57,9 @@ def test_sqlite_context_no_path() -> None: """ Test that we error if a Database path is not provided. """ - with pytest.raises(ValueError, match="SQLite connection requires a database path."): + with pytest.raises( + PyDoughSessionException, match="SQLite connection requires a database path." + ): load_database_context("sqlite") @@ -64,7 +67,7 @@ def test_sqlite_context_wrong_name() -> None: """ Test that we error if the database name is incorrect. """ - with pytest.raises(ValueError, match="Unsupported database: sqlite3"): + with pytest.raises(PyDoughSessionException, match="Unsupported database: sqlite3"): load_database_context("sqlite3", database=":memory:") @@ -94,5 +97,5 @@ def test_unsupported_database() -> None: TODO: Remove when we support mysql or move to a more generic file. """ - with pytest.raises(ValueError): + with pytest.raises(PyDoughSessionException): load_database_context("mysql", database=":memory:") diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 6f2c18507..245288119 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -37,6 +37,7 @@ from pydough.configs import PyDoughConfigs from pydough.conversion import convert_ast_to_relational from pydough.database_connectors import DatabaseContext +from pydough.errors import PyDoughTestingException from pydough.evaluation.evaluate_unqualified import _load_column_selection from pydough.metadata import GraphMetadata from pydough.pydough_operators import get_operator_by_name @@ -241,7 +242,9 @@ def to_string(self) -> str: case "RANKING": return f"{self.name}(by=({', '.join(collation_strings)}), levels={self.levels}{kwargs_str})" case _: - raise Exception(f"Unsupported window function {self.name}") + raise PyDoughTestingException( + f"Unsupported window function {self.name}" + ) def build( self, @@ -267,7 +270,9 @@ def build( self.kwargs, ) case _: - raise Exception(f"Unsupported window function {self.name}") + raise PyDoughTestingException( + f"Unsupported window function {self.name}" + ) class ReferenceInfo(AstNodeTestInfo): @@ -657,7 +662,9 @@ def local_build( children_contexts: list[PyDoughCollectionQDAG] | None = None, ) -> PyDoughCollectionQDAG: if context is None: - raise Exception("Must provide a context when building a WHERE clause.") + raise PyDoughTestingException( + "Must provide a context when building a WHERE clause." + ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) raw_where: Where = builder.build_where(context, children) cond = self.condition.build(builder, context, children) @@ -689,7 +696,9 @@ def local_build( children_contexts: list[PyDoughCollectionQDAG] | None = None, ) -> PyDoughCollectionQDAG: if context is None: - raise Exception("Must provide a context when building a Singular clause.") + raise PyDoughTestingException( + "Must provide a context when building a Singular clause." + ) raw_singular: Singular = builder.build_singular(context) return raw_singular @@ -727,7 +736,7 @@ def local_build( children_contexts: list[PyDoughCollectionQDAG] | None = None, ) -> PyDoughCollectionQDAG: if context is None: - raise Exception( + raise PyDoughTestingException( "Must provide context and children_contexts when building an ORDER BY clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) @@ -776,7 +785,7 @@ def local_build( children_contexts: list[PyDoughCollectionQDAG] | None = None, ) -> PyDoughCollectionQDAG: if context is None: - raise Exception( + raise PyDoughTestingException( "Must provide context and children_contexts when building a TOPK clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) From 675421c074789b0a74fb8226274dc2d767880a15 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 8 Jul 2025 11:01:57 -0400 Subject: [PATCH 003/143] Added error builder class and integrated for term not found errors --- pydough/configs/session.py | 23 ++++++ pydough/errors/__init__.py | 2 + pydough/errors/pydough_error_builder.py | 37 ++++++++++ pydough/qdag/collections/calculate.py | 5 +- pydough/qdag/collections/collection_access.py | 7 +- pydough/qdag/collections/collection_qdag.py | 72 ++++++++++++------- pydough/qdag/collections/global_context.py | 5 +- pydough/qdag/collections/partition_by.py | 3 +- pydough/qdag/collections/partition_child.py | 8 +-- 9 files changed, 125 insertions(+), 37 deletions(-) create mode 100644 pydough/errors/pydough_error_builder.py diff --git a/pydough/configs/session.py b/pydough/configs/session.py index 128a4a145..ac197e5cd 100644 --- a/pydough/configs/session.py +++ b/pydough/configs/session.py @@ -5,6 +5,7 @@ - The active metadata graph. - Any PyDough configuration for function behavior. - Backend information (SQL dialect, Database connection, etc.) +- The error builder used to create and format exceptions In the future this session will also contain other information such as any User Defined registration for additional backend @@ -24,6 +25,7 @@ empty_connection, load_database_context, ) +from pydough.errors import PyDoughErrorBuilder from pydough.metadata import GraphMetadata, parse_json_metadata_from_file from .pydough_configs import PyDoughConfigs @@ -47,6 +49,7 @@ def __init__(self) -> None: self._database: DatabaseContext = DatabaseContext( connection=empty_connection, dialect=DatabaseDialect.ANSI ) + self._error_builder: PyDoughErrorBuilder = PyDoughErrorBuilder() @property def metadata(self) -> GraphMetadata | None: @@ -108,6 +111,26 @@ def database(self, context: DatabaseContext) -> None: """ self._database = context + @property + def error_builder(self) -> PyDoughErrorBuilder: + """ + Get the active error builder. + + Returns: + The active error builder. + """ + return self._error_builder + + @error_builder.setter + def error_builder(self, builder: PyDoughErrorBuilder) -> None: + """ + Set the active error builder context. + + Args: + The error builder to set. + """ + self._error_builder = builder + def connect_database(self, database_name: str, **kwargs) -> DatabaseContext: """ Create a new DatabaseContext and register it in the session. This returns diff --git a/pydough/errors/__init__.py b/pydough/errors/__init__.py index ea9668f25..2fc116db0 100644 --- a/pydough/errors/__init__.py +++ b/pydough/errors/__init__.py @@ -3,6 +3,7 @@ """ __all__ = [ + "PyDoughErrorBuilder", "PyDoughException", "PyDoughMetadataException", "PyDoughQDAGException", @@ -23,3 +24,4 @@ PyDoughTypeException, PyDoughUnqualifiedException, ) +from .pydough_error_builder import PyDoughErrorBuilder diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py new file mode 100644 index 000000000..cd6f423e0 --- /dev/null +++ b/pydough/errors/pydough_error_builder.py @@ -0,0 +1,37 @@ +""" +Definition of the base class for creating exceptions in PyDough. +""" + +from typing import TYPE_CHECKING + +from pydough.errors import PyDoughException, PyDoughQDAGException + +if TYPE_CHECKING: + from pydough.qdag import PyDoughCollectionQDAG + + +class PyDoughErrorBuilder: + """ + Base class for creating exceptions in PyDough. This class provides an + interface that the internals of PyDough will call to create various + exceptions. An instance of this class is installed in the PyDough active + session, telling PyDough how to create exceptions and what their messages + should contain for most situations. A subclass can be created and installed + into the session to customize the error messages. + """ + + def term_not_found( + self, collection: "PyDoughCollectionQDAG", term_name: str + ) -> PyDoughException: + """ + Creates an exception for when a term is not found in the specified collection. + + Args: + `collection`: The collection in which the term was not found. + `term_name` The name of the term that was not found. + Returns: + An exception indicating that the term was not found. + """ + return PyDoughQDAGException( + collection.name_mismatch_error(term_name, atol=2, rtol=0.1, min_names=3) + ) diff --git a/pydough/qdag/collections/calculate.py b/pydough/qdag/collections/calculate.py index 7d6f6cfd9..661e4f7db 100644 --- a/pydough/qdag/collections/calculate.py +++ b/pydough/qdag/collections/calculate.py @@ -9,6 +9,7 @@ from functools import cache +import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( @@ -147,10 +148,10 @@ def get_expression_position(self, expr_name: str) -> int: return self.calc_term_indices[expr_name] def get_term(self, term_name: str) -> PyDoughQDAG: # type: ignore + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found(self, term_name) if term_name in self.calc_term_values: return self.calc_term_values[term_name] - elif term_name not in self.all_terms: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) return super().get_term(term_name) diff --git a/pydough/qdag/collections/collection_access.py b/pydough/qdag/collections/collection_access.py index 7b0b06807..b4dcd1bec 100644 --- a/pydough/qdag/collections/collection_access.py +++ b/pydough/qdag/collections/collection_access.py @@ -8,6 +8,7 @@ from functools import cache +import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -107,6 +108,9 @@ def get_expression_position(self, expr_name: str) -> int: @cache def get_term(self, term_name: str) -> PyDoughQDAG: + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found(self, term_name) + # Special handling of terms down-streamed from an ancestor CALCULATE # clause. if term_name in self.ancestral_mapping: @@ -131,9 +135,6 @@ def get_term(self, term_name: str) -> PyDoughQDAG: context = context.ancestor_context return Reference(context, term_name) - if term_name not in self.all_terms: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) - return self.get_term_from_property(term_name) def get_term_from_property(self, term_name: str) -> PyDoughQDAG: diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index c91634d62..3c8360a30 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -5,7 +5,6 @@ __all__ = ["PyDoughCollectionQDAG"] -import re from abc import abstractmethod from collections.abc import Iterable from functools import cache, cached_property @@ -362,16 +361,25 @@ def to_tree_string(self) -> str: """ return "\n".join(self.to_tree_form(True).to_string_rows()) - def find_possible_name_matches(self, term_name: str) -> list[str]: + def find_possible_name_matches( + self, term_name: str, atol: int, rtol: float, min_names: int + ) -> list[str]: """ Finds and returns a list of candidate names that closely match the given name based on minimum edit distance. Args: - name (str): The name to match against the list of candidates. + `term_name`: The name to match against the list of candidates. + `atol`: The absolute tolerance for the minimum edit distance; any + candidate with a minimum edit distance less than or equal to + `closest_match + atol` will be included in the results. + `rtol`: The relative tolerance for the minimum edit distance; any + candidate with a minimum edit distance less than or equal to + `closest_match * (1 + rtol)` will be included in the results. + `min_names`: The minimum number of names to return. Returns: - list[str]: A list of candidate names, based on the closest matches. + A list of candidate names, based on the closest matches. """ terms_distance_list: list[tuple[float, str]] = [] @@ -388,27 +396,25 @@ def find_possible_name_matches(self, term_name: str) -> list[str]: closest_match = terms_distance_list[0] - # List with all names that have a me <= closest_match + 2 - matches_within_2: list[str] = [] - # List with all names that have a me <= closest_match * 1.1 - matches_within_10_pct: list[str] = [] - # List with the top 3 closest matches (me) breaking ties by name - matches_top_3: list[str] = [name for _, name in terms_distance_list[:3]] + # List with all names that have a me <= closest_match + atol + matches_within_atol: list[str] = [ + name for me, name in terms_distance_list if me <= closest_match[0] + atol + ] - # filtering the result - for me, name in terms_distance_list: - # all names that have a me <= closest_match + 2 - if me <= closest_match[0] + 2: - matches_within_2.append(name) + # List with all names that have a me <= closest_match * 1.1 + matches_within_rtol: list[str] = [ + name + for me, name in terms_distance_list + if me <= closest_match[0] * (1 + rtol) + ] - # all names that have a me <= closest_match * 1.1 - if me <= closest_match[0] * 1.1: - matches_within_10_pct.append(name) + # List with the top 3 closest matches (me) breaking ties by name + min_matches: list[str] = [name for _, name in terms_distance_list[:min_names]] - # returning the larger - # using + # Return whichever of the three lists is the longest, breaking ties + # lexicographically by the names within. return max( - [matches_within_2, matches_within_10_pct, matches_top_3], + [matches_within_atol, matches_within_rtol, min_matches], key=lambda x: (len(x), x), ) @@ -471,20 +477,36 @@ def min_edit_distance(s: str, t: str) -> float: return arr[previousRow, m] # Return the last computed row's last element - def name_mismatch_error(self, term_name: str) -> str: + def name_mismatch_error( + self, term_name: str, atol: int = 2, rtol: float = 0.1, min_names: int = 3 + ) -> str: """ Raises a name mismatch error with suggestions if possible. + Args: - term_name (str): The name of the term that caused the error. + `term_name`: The name of the term that caused the error. + `atol`: The absolute tolerance for the minimum edit distance when + determining whether to include a term as a suggestion; any term + names with a minimum edit distance less than or equal to + `closest_match + atol` will be included as a suggestion. + `rtol`: The relative tolerance for the minimum edit distance when + determining whether to include a term as a suggestion; any term + names with a minimum edit distance less than or equal to + `closest_match * (1 + rtol)` will be included as a suggestion. + `min_names`: The minimum number of suggestions to include. + + Returns: + A string describing the error, including suggestions if available. """ error_message: str = f"Unrecognized term of {self.to_string()}: {term_name!r}." - suggestions: list[str] = self.find_possible_name_matches(term_name=term_name) + suggestions: list[str] = self.find_possible_name_matches( + term_name=term_name, atol=atol, rtol=rtol, min_names=min_names + ) # Check if there are any suggestions to add if len(suggestions) > 0: suggestions_str: str = ", ".join(suggestions) error_message += f" Did you mean: {suggestions_str}?" - re.escape(error_message) return error_message diff --git a/pydough/qdag/collections/global_context.py b/pydough/qdag/collections/global_context.py index 0be884f0b..f16999388 100644 --- a/pydough/qdag/collections/global_context.py +++ b/pydough/qdag/collections/global_context.py @@ -7,6 +7,7 @@ __all__ = ["TableCollection"] +import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -115,8 +116,8 @@ def get_expression_position(self, expr_name: str) -> int: raise PyDoughQDAGException(f"Cannot call get_expression_position on {self!r}") def get_term(self, term_name: str) -> PyDoughQDAG: - if term_name not in self.collections: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found(self, term_name) return self.collections[term_name] diff --git a/pydough/qdag/collections/partition_by.py b/pydough/qdag/collections/partition_by.py index c2554b83f..7261c6116 100644 --- a/pydough/qdag/collections/partition_by.py +++ b/pydough/qdag/collections/partition_by.py @@ -9,6 +9,7 @@ from functools import cache +import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( @@ -188,7 +189,7 @@ def get_term(self, term_name: str) -> PyDoughQDAG: elif term_name == self.child.name: return PartitionChild(self.child, self.child.name, self) else: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) + raise pydough.active_session.error_builder.term_not_found(self, term_name) def to_tree_form(self, is_last: bool) -> CollectionTreeForm: predecessor: CollectionTreeForm = self.ancestor_context.to_tree_form(is_last) diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index 138dbcf3e..607525522 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -8,7 +8,7 @@ from functools import cache -from pydough.errors import PyDoughQDAGException +import pydough from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, @@ -90,6 +90,9 @@ def inherited_downstreamed_terms(self) -> set[str]: @cache def get_term(self, term_name: str): + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found(self, term_name) + if term_name in self.ancestral_mapping: return BackReferenceExpression( self, term_name, self.ancestral_mapping[term_name] @@ -104,9 +107,6 @@ def get_term(self, term_name: str): context = context.ancestor_context return Reference(context, term_name) - elif term_name not in self.all_terms: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) - return super().get_term(term_name) def is_singular(self, context: PyDoughCollectionQDAG) -> bool: From 097b8a0b80d5fd51bf7c3285bb0ca3e054d1be66 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 8 Jul 2025 11:28:29 -0400 Subject: [PATCH 004/143] Minor refactor to term_not_found error usage [RUN CI] --- pydough/qdag/collections/calculate.py | 4 +--- pydough/qdag/collections/collection_access.py | 4 +--- pydough/qdag/collections/collection_qdag.py | 18 ++++++++++++++++++ pydough/qdag/collections/global_context.py | 5 +---- pydough/qdag/collections/partition_by.py | 9 ++++----- pydough/qdag/collections/partition_child.py | 5 +---- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/pydough/qdag/collections/calculate.py b/pydough/qdag/collections/calculate.py index 661e4f7db..b4d48664b 100644 --- a/pydough/qdag/collections/calculate.py +++ b/pydough/qdag/collections/calculate.py @@ -9,7 +9,6 @@ from functools import cache -import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( @@ -148,8 +147,7 @@ def get_expression_position(self, expr_name: str) -> int: return self.calc_term_indices[expr_name] def get_term(self, term_name: str) -> PyDoughQDAG: # type: ignore - if term_name not in self.all_terms: - raise pydough.active_session.error_builder.term_not_found(self, term_name) + self.verify_term_exists(term_name) if term_name in self.calc_term_values: return self.calc_term_values[term_name] diff --git a/pydough/qdag/collections/collection_access.py b/pydough/qdag/collections/collection_access.py index b4dcd1bec..75f167e6e 100644 --- a/pydough/qdag/collections/collection_access.py +++ b/pydough/qdag/collections/collection_access.py @@ -8,7 +8,6 @@ from functools import cache -import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -108,8 +107,7 @@ def get_expression_position(self, expr_name: str) -> int: @cache def get_term(self, term_name: str) -> PyDoughQDAG: - if term_name not in self.all_terms: - raise pydough.active_session.error_builder.term_not_found(self, term_name) + self.verify_term_exists(term_name) # Special handling of terms down-streamed from an ancestor CALCULATE # clause. diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index 3c8360a30..2c4bb7610 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -12,6 +12,7 @@ import numpy as np +import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions.collation_expression import CollationExpression @@ -510,3 +511,20 @@ def name_mismatch_error( error_message += f" Did you mean: {suggestions_str}?" return error_message + + def verify_term_exists(self, term_name: str) -> None: + """ + Verifies that a term exists in the collection, and raises an exception + if it does not. + + Args: + `term_name`: The name of the term to check whether it exists within + the collection. + + Raises: + `PyDoughException` if the term does not exist in the collection. + """ + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found( + collection=self, term_name=term_name + ) diff --git a/pydough/qdag/collections/global_context.py b/pydough/qdag/collections/global_context.py index f16999388..af034810c 100644 --- a/pydough/qdag/collections/global_context.py +++ b/pydough/qdag/collections/global_context.py @@ -7,7 +7,6 @@ __all__ = ["TableCollection"] -import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -116,9 +115,7 @@ def get_expression_position(self, expr_name: str) -> int: raise PyDoughQDAGException(f"Cannot call get_expression_position on {self!r}") def get_term(self, term_name: str) -> PyDoughQDAG: - if term_name not in self.all_terms: - raise pydough.active_session.error_builder.term_not_found(self, term_name) - + self.verify_term_exists(term_name) return self.collections[term_name] @property diff --git a/pydough/qdag/collections/partition_by.py b/pydough/qdag/collections/partition_by.py index 7261c6116..0d315f213 100644 --- a/pydough/qdag/collections/partition_by.py +++ b/pydough/qdag/collections/partition_by.py @@ -9,7 +9,6 @@ from functools import cache -import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( @@ -183,13 +182,13 @@ def get_term(self, term_name: str) -> PyDoughQDAG: return BackReferenceExpression( self, term_name, self.ancestral_mapping[term_name] ) - elif term_name in self._key_name_indices: - term: PartitionKey = self.keys[self._key_name_indices[term_name]] - return term elif term_name == self.child.name: return PartitionChild(self.child, self.child.name, self) else: - raise pydough.active_session.error_builder.term_not_found(self, term_name) + self.verify_term_exists(term_name) + assert term_name in self._key_name_indices + term: PartitionKey = self.keys[self._key_name_indices[term_name]] + return term def to_tree_form(self, is_last: bool) -> CollectionTreeForm: predecessor: CollectionTreeForm = self.ancestor_context.to_tree_form(is_last) diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index 607525522..e10222ebf 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -8,7 +8,6 @@ from functools import cache -import pydough from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, @@ -90,9 +89,7 @@ def inherited_downstreamed_terms(self) -> set[str]: @cache def get_term(self, term_name: str): - if term_name not in self.all_terms: - raise pydough.active_session.error_builder.term_not_found(self, term_name) - + self.verify_term_exists(term_name) if term_name in self.ancestral_mapping: return BackReferenceExpression( self, term_name, self.ancestral_mapping[term_name] From 18ba964e7cd5a09a8e4c9efb48009538a3607a26 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 9 Jul 2025 12:38:02 -0400 Subject: [PATCH 005/143] WIP --- pydough/conversion/filter_pushdown.py | 8 +- pydough/conversion/merge_projects.py | 11 +- pydough/conversion/projection_pullup.py | 143 ++++++++++++++++++ pydough/conversion/relational_converter.py | 22 +++ pydough/relational/rel_util.py | 15 +- tests/test_plan_refsols/correl_27.txt | 14 +- tests/test_plan_refsols/correl_28.txt | 14 +- tests/test_plan_refsols/correl_30.txt | 8 +- .../parts_quantity_increase_95_96.txt | 22 ++- tests/test_plan_refsols/semi_aggregate.txt | 15 +- tests/test_plan_refsols/tpch_q20.txt | 8 +- tests/test_plan_refsols/tpch_q5.txt | 34 ++--- .../defog_dealership_adv11_ansi.sql | 18 +-- .../defog_dealership_adv11_sqlite.sql | 18 +-- .../defog_dealership_adv2_ansi.sql | 6 +- .../defog_dealership_adv2_sqlite.sql | 6 +- .../defog_ewallet_adv10_ansi.sql | 4 +- .../defog_ewallet_adv10_sqlite.sql | 4 +- .../defog_ewallet_adv16_ansi.sql | 4 +- .../defog_ewallet_adv16_sqlite.sql | 4 +- .../defog_ewallet_adv1_ansi.sql | 9 +- .../defog_ewallet_adv1_sqlite.sql | 9 +- .../defog_ewallet_adv3_ansi.sql | 4 +- .../defog_ewallet_adv3_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q20_ansi.sql | 8 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 8 +- tests/test_sql_refsols/tpch_q5_ansi.sql | 57 +++---- tests/test_sql_refsols/tpch_q5_sqlite.sql | 57 +++---- 28 files changed, 329 insertions(+), 205 deletions(-) create mode 100644 pydough/conversion/projection_pullup.py diff --git a/pydough/conversion/filter_pushdown.py b/pydough/conversion/filter_pushdown.py index d50d9da5d..cab872b55 100644 --- a/pydough/conversion/filter_pushdown.py +++ b/pydough/conversion/filter_pushdown.py @@ -48,17 +48,21 @@ def push_filters( """ remaining_filters: set[RelationalExpression] pushable_filters: set[RelationalExpression] + new_input: RelationalNode match node: case Filter(): # Add all of the conditions from the filters pushed down this far # with the filters from the current node. If there is a window # function, materialize all of them at this point, otherwise push # all of them further. + filters = {transpose_expression(expr, node.columns) for expr in filters} filters.update(get_conjunctions(node.condition)) if contains_window(node.condition): - return build_filter(push_filters(node.input, set()), filters) + remaining_filters, pushable_filters = filters, set() else: - return push_filters(node.input, filters) + remaining_filters, pushable_filters = set(), filters + new_input = push_filters(node.input, pushable_filters) + return build_filter(new_input, remaining_filters, columns=node.columns) case Project(): if any(contains_window(expr) for expr in node.columns.values()): # If there is a window function, materialize all filters at diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index 363117af5..7a22b2cad 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -267,12 +267,15 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: return node -def merge_projects(node: RelationalNode) -> RelationalNode: +def merge_projects( + node: RelationalNode, push_into_joins: bool = True +) -> RelationalNode: """ Merge adjacent projections when beneficial. Args: `node`: The current node of the relational tree. + `push_into_joins`: If True, push projections into joins when possible. Returns: The transformed version of `node` with adjacent projections merged @@ -281,11 +284,13 @@ def merge_projects(node: RelationalNode) -> RelationalNode: """ # If there is a project on top of a join, attempt to push it down into the # inputs of the join. - if isinstance(node, Project) and isinstance(node.input, Join): + if isinstance(node, Project) and isinstance(node.input, Join) and push_into_joins: node = project_join_transpose(node) # Recursively invoke the procedure on all inputs to the node. - node = node.copy(inputs=[merge_projects(input) for input in node.inputs]) + node = node.copy( + inputs=[merge_projects(input, push_into_joins) for input in node.inputs] + ) # Invoke the main merging step if the current node is a root/projection, # potentially multiple times if the projection below it that gets deleted diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py new file mode 100644 index 000000000..88bc2d6ef --- /dev/null +++ b/pydough/conversion/projection_pullup.py @@ -0,0 +1,143 @@ +""" +Logic used to pull up projections in the relational plan so function calls +happen as late as possible, ideally after filters, filtering joins, and +aggregations. +""" + +__all__ = ["pullup_projections"] + + +from pydough.relational import ( + CallExpression, + ColumnReference, + Filter, + Join, + JoinType, + LiteralExpression, + Project, + RelationalExpression, + RelationalNode, + RelationalRoot, +) +from pydough.relational.rel_util import apply_substitution, contains_window +from pydough.relational.relational_expressions.column_reference_finder import ( + ColumnReferenceFinder, +) + +from .merge_projects import merge_adjacent_projects + + +def pull_non_columns(node: RelationalNode) -> RelationalNode: + """ + TODO + """ + new_node_columns: dict[str, RelationalExpression] = {} + new_project_columns: dict[str, RelationalExpression] = {} + needs_pull: bool = False + + for name, expr in node.columns.items(): + new_node_columns[name] = expr + match expr: + case ColumnReference(): + new_project_columns[name] = ColumnReference(name, expr.data_type) + case LiteralExpression() | CallExpression(): + new_project_columns[name] = expr + needs_pull = True + case _: + raise NotImplementedError( + f"Unsupported expression type {expr.__class__.__name__} in join columns." + ) + + if not needs_pull: + return node + + new_input: RelationalNode = node.copy(columns=new_node_columns) + return Project(input=new_input, columns=new_project_columns) + + +def pull_project_into_join(node: Join, input_index: int) -> None: + """ + TODO + """ + if not isinstance(node.inputs[input_index], Project): + return + + +def pull_project_into_filter(node: Filter) -> None: + """ + TODO + """ + if not isinstance(node.input, Project): + return + + project: Project = node.input + + finder: ColumnReferenceFinder = ColumnReferenceFinder() + finder.reset() + node.condition.accept(finder) + condition_cols: set[ColumnReference] = finder.get_column_references() + condition_names: set[str] = {col.name for col in condition_cols} + + ref_expr: ColumnReference + new_ref: ColumnReference + new_project_columns: dict[str, RelationalExpression] = {} + used_cols: set[RelationalExpression] = set() + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = {} + for name, expr in project.columns.items(): + new_project_columns[name] = expr + used_cols.add(expr) + for name, expr in project.input.columns.items(): + ref_expr = ColumnReference(name, expr.data_type) + if name in condition_names: + continue + if ref_expr not in used_cols: + new_name: str = name + idx: int = 0 + while new_name in new_project_columns: + idx += 1 + new_name = f"{name}_{idx}" + new_ref = ColumnReference(name, expr.data_type) + new_project_columns[new_name] = new_ref + transfer_substitutions[ref_expr] = new_ref + + node._input = project.copy(columns=new_project_columns) + + cond_contains_window: bool = contains_window(node.condition) + substitutions: dict[RelationalExpression, RelationalExpression] = {} + existing_outputs: set[RelationalExpression] = set(node.columns.values()) + new_filter_columns: dict[str, RelationalExpression] = {} + for name, expr in project.columns.items(): + ref_expr = ColumnReference(name, expr.data_type) + new_filter_columns[name] = expr + new_expr: RelationalExpression = apply_substitution( + expr, transfer_substitutions, {} + ) + if not (cond_contains_window and contains_window(new_expr)): + if name in condition_names: + if ref_expr not in existing_outputs: + substitutions[ref_expr] = new_expr + else: + new_filter_columns[name] = new_expr + node._condition = apply_substitution(node.condition, substitutions, {}) + node._columns = new_filter_columns + + +def pullup_projections(node: RelationalNode) -> RelationalNode: + """ + TODO + """ + # Recursively invoke the procedure on all inputs to the node. + node = node.copy(inputs=[pullup_projections(input) for input in node.inputs]) + match node: + case RelationalRoot() | Project(): + return merge_adjacent_projects(node) + case Join(): + pull_project_into_join(node, 0) + if node.join_type == JoinType.INNER: + pull_project_into_join(node, 1) + return pull_non_columns(node) + case Filter(): + # pull_project_into_filter(node) + return pull_non_columns(node) + case _: + return node diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index c990260c8..b2f0372a4 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1447,6 +1447,28 @@ def optimize_relational_tree( # Step 9: re-run projection merging. root = confirm_root(merge_projects(root)) + """ + # Step 6: bubble up names from the leaf nodes to further encourage simpler + # naming without aliases, and also to delete duplicate columns where + # possible. + root = bubble_column_names(root) + + # Step 7: run projection pullup. + root = confirm_root(pullup_projections(root)) + + # Step 8: prune unused columns. + root = ColumnPruner().prune_unused_columns(root) + + # Step 9: re-run filter pushdown + root._input = push_filters(root.input, set()) + + # Step 10: re-run projection merging, without pushing into joins. + root = confirm_root(merge_projects(root, push_into_joins=False)) + + # Step 11: re-run column pruning. + root = ColumnPruner().prune_unused_columns(root) + """ + return root diff --git a/pydough/relational/rel_util.py b/pydough/relational/rel_util.py index b2681b181..c0423f91e 100644 --- a/pydough/relational/rel_util.py +++ b/pydough/relational/rel_util.py @@ -38,6 +38,7 @@ Filter, Join, JoinType, + Project, RelationalNode, ) @@ -244,7 +245,9 @@ def passthrough_column_mapping(node: RelationalNode) -> dict[str, RelationalExpr def build_filter( - node: RelationalNode, filters: set[RelationalExpression] + node: RelationalNode, + filters: set[RelationalExpression], + columns: dict[str, RelationalExpression] | None = None, ) -> RelationalNode: """ Build a filter node with the given filters on top of an input node. @@ -252,6 +255,9 @@ def build_filter( Args: `node`: The input node to build the filter on top of. `filters`: The set of filters to apply. + `columns`: An optional mapping of the column mapping to use on the + built filter node. If not provided, uses the passthrough column mapping + of `node`. Returns: A filter node with the given filters applied on top of `node`. If @@ -263,6 +269,9 @@ def build_filter( filters.discard(LiteralExpression(True, BooleanType())) condition: RelationalExpression if len(filters) == 0: + # If columns was provided, use it to create a Project node + if columns is not None: + return Project(node, columns) return node # Detect whether the filter can be pushed into a join condition. If so, @@ -301,7 +310,9 @@ def build_filter( # Otherwise, just return a new filter node with the new condition on top # of the existing node. - return Filter(node, condition, passthrough_column_mapping(node)) + if columns is None: + columns = passthrough_column_mapping(node) + return Filter(node, condition, columns) def transpose_expression( diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 2923d9a3c..1d43f55ec 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) - JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_selected_purchases': t0.n_selected_purchases, 'nation_name': t0.nation_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey), 'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_selected_purchases': t0.n_selected_purchases}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_selected_purchases': 1:numeric, 'o_orderkey': o_orderkey}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases', sum_sum_agg_0)], orderings=[(anything_anything_n_name):asc_first]) + JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index ffeb0deba..8c226d189 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) - JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_selected_purchases': t0.n_selected_purchases, 'nation_name': t0.nation_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey), 'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_selected_purchases': t0.n_selected_purchases}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_selected_purchases': 1:numeric, 'o_orderkey': o_orderkey}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases', sum_sum_agg_0)], orderings=[(anything_anything_n_name):asc_first]) + JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 8da8d15fa..46f53cd79 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', region_name), ('nation_name', nation_name), ('n_above_avg_customers', n_above_avg_customers), ('n_above_avg_suppliers', n_above_avg_suppliers)], orderings=[(region_name):asc_first, (nation_name):asc_first]) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_above_avg_customers': t0.n_above_avg_customers, 'n_above_avg_suppliers': t1.n_above_avg_suppliers, 'nation_name': t0.nation_name, 'region_name': t0.region_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_nationkey': ANYTHING(n_nationkey), 'n_above_avg_customers': COUNT(), 'nation_name': ANYTHING(n_name), 'region_name': ANYTHING(region_name)}) +ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': region_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'region_name': t0.region_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'region_name': t1.region_name}) @@ -15,7 +15,7 @@ ROOT(columns=[('region_name', region_name), ('nation_name', nation_name), ('n_ab FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_above_avg_suppliers': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index c20fcf841..391c0d4fb 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,21 +1,19 @@ ROOT(columns=[('name', p_name), ('qty_95', qty_95), ('qty_96', qty_96)], orderings=[(ordering_2):desc_last, (p_name):asc_first]) LIMIT(limit=Literal(value=3, type=NumericType()), columns={'ordering_2': ordering_2, 'p_name': p_name, 'qty_95': qty_95, 'qty_96': qty_96}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) - PROJECT(columns={'ordering_2': qty_96 - qty_95, 'p_name': p_name, 'qty_95': qty_95, 'qty_96': qty_96}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'qty_95': t0.qty_95, 'qty_96': t1.qty_96}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'qty_95': t1.qty_95}) + PROJECT(columns={'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'qty_95': DEFAULT_TO(sum_l_quantity, 0:numeric), 'qty_96': DEFAULT_TO(agg_1, 0:numeric)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - PROJECT(columns={'l_partkey': l_partkey, 'qty_95': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - PROJECT(columns={'l_partkey': l_partkey, 'qty_96': DEFAULT_TO(sum_l_quantity, 0:numeric)}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 001fda1d3..8b83d01e6 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', sum_price_of_10parts)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_price_of_10parts': t1.sum_price_of_10parts}) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - PROJECT(columns={'avg_p_retailprice': avg_p_retailprice, 'n_rows': n_rows, 'ps_suppkey': ps_suppkey, 'sum_price_of_10parts': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index d179a91c2..f025d4504 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -7,13 +7,13 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(agg_0, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'part_qty': t1.part_qty, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'part_qty': t1.part_qty}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - PROJECT(columns={'agg_0': DEFAULT_TO(sum_l_quantity, 0:numeric), 'l_partkey': l_partkey}) + PROJECT(columns={'l_partkey': l_partkey, 'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index afb898a11..a6f56996d 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,21 +1,19 @@ -ROOT(columns=[('N_NAME', anything_anything_n_name), ('REVENUE', REVENUE)], orderings=[(REVENUE):desc_last]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_sum_sum_value, 0:numeric), 'anything_anything_n_name': anything_anything_n_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'sum_sum_sum_value': SUM(sum_sum_value)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'n_nationkey': t0.n_nationkey, 'sum_sum_value': t0.sum_sum_value}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_sum_value': SUM(sum_value)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'sum_value': t1.sum_value}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_value': SUM(value)}) - PROJECT(columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'value': l_extendedprice * 1:numeric - l_discount}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) +ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', REVENUE)], orderings=[(REVENUE):desc_last]) + PROJECT(columns={'REVENUE': DEFAULT_TO(sum_value, 0:numeric), 'anything_n_name': anything_n_name}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'value': t0.value}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'value': t1.value}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + PROJECT(columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'value': l_extendedprice * 1:numeric - l_discount}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql index 2f90c30b7..649382f8c 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql @@ -1,19 +1,11 @@ -WITH _s0 AS ( - SELECT - SUM(sale_price) AS sum_sale_price, - car_id - FROM main.sales - WHERE - EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 - GROUP BY - car_id -) SELECT ( ( - COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + COALESCE(SUM(sales.sale_price), 0) - COALESCE(SUM(cars.cost), 0) ) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _s0 AS _s0 +FROM main.sales AS sales JOIN main.cars AS cars - ON _s0.car_id = cars._id + ON cars._id = sales.car_id +WHERE + EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 diff --git a/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql index 3a7f5ebec..95c7af48e 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql @@ -1,19 +1,11 @@ -WITH _s0 AS ( - SELECT - SUM(sale_price) AS sum_sale_price, - car_id - FROM main.sales - WHERE - CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 - GROUP BY - car_id -) SELECT ( CAST(( - COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + COALESCE(SUM(sales.sale_price), 0) - COALESCE(SUM(cars.cost), 0) ) AS REAL) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _s0 AS _s0 +FROM main.sales AS sales JOIN main.cars AS cars - ON _s0.car_id = cars._id + ON cars._id = sales.car_id +WHERE + CAST(STRFTIME('%Y', sales.sale_date) AS INTEGER) = 2023 diff --git a/tests/test_sql_refsols/defog_dealership_adv2_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv2_ansi.sql index 59cbeb190..776eae11f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv2_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv2_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS num_sales, + COUNT(*) AS n_rows, salesperson_id FROM main.sales WHERE @@ -12,9 +12,9 @@ SELECT salespersons._id, salespersons.first_name, salespersons.last_name, - _s1.num_sales + _s1.n_rows AS num_sales FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - num_sales DESC + _s1.n_rows DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv2_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv2_sqlite.sql index 71037cdc7..2f836e759 100644 --- a/tests/test_sql_refsols/defog_dealership_adv2_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv2_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS num_sales, + COUNT(*) AS n_rows, salesperson_id FROM main.sales WHERE @@ -14,9 +14,9 @@ SELECT salespersons._id, salespersons.first_name, salespersons.last_name, - _s1.num_sales + _s1.n_rows AS num_sales FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - num_sales DESC + _s1.n_rows DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql index aab001a76..38f66aaba 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_transactions, + COUNT(*) AS n_rows, sender_id FROM main.wallet_transactions_daily WHERE @@ -10,7 +10,7 @@ WITH _s1 AS ( ) SELECT users.uid AS user_id, - _s1.total_transactions + _s1.n_rows AS total_transactions FROM main.users AS users JOIN _s1 AS _s1 ON _s1.sender_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql index aab001a76..38f66aaba 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_transactions, + COUNT(*) AS n_rows, sender_id FROM main.wallet_transactions_daily WHERE @@ -10,7 +10,7 @@ WITH _s1 AS ( ) SELECT users.uid AS user_id, - _s1.total_transactions + _s1.n_rows AS total_transactions FROM main.users AS users JOIN _s1 AS _s1 ON _s1.sender_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql index 5dec70a8c..9f171f00c 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_unread_notifs, + COUNT(*) AS n_rows, user_id FROM main.notifications WHERE @@ -10,7 +10,7 @@ WITH _s1 AS ( ) SELECT users.username, - _s1.total_unread_notifs + _s1.n_rows AS total_unread_notifs FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql index 5dec70a8c..9f171f00c 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_unread_notifs, + COUNT(*) AS n_rows, user_id FROM main.notifications WHERE @@ -10,7 +10,7 @@ WITH _s1 AS ( ) SELECT users.username, - _s1.total_unread_notifs + _s1.n_rows AS total_unread_notifs FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql index cea476af4..05ebb109f 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql @@ -1,8 +1,7 @@ WITH _s1 AS ( SELECT - ( - COUNT(DISTINCT coupon_id) * 1.0 - ) / COUNT(DISTINCT txid) AS cpur, + COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, + COUNT(DISTINCT txid) AS ndistinct_txid, receiver_id FROM main.wallet_transactions_daily WHERE @@ -12,7 +11,9 @@ WITH _s1 AS ( ) SELECT merchants.name, - _s1.cpur AS CPUR + ( + _s1.ndistinct_coupon_id * 1.0 + ) / _s1.ndistinct_txid AS CPUR FROM main.merchants AS merchants JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql index d57bb5e21..c4b8b97ad 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql @@ -1,8 +1,7 @@ WITH _s1 AS ( SELECT - CAST(( - COUNT(DISTINCT coupon_id) * 1.0 - ) AS REAL) / COUNT(DISTINCT txid) AS cpur, + COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, + COUNT(DISTINCT txid) AS ndistinct_txid, receiver_id FROM main.wallet_transactions_daily WHERE @@ -12,7 +11,9 @@ WITH _s1 AS ( ) SELECT merchants.name, - _s1.cpur AS CPUR + CAST(( + _s1.ndistinct_coupon_id * 1.0 + ) AS REAL) / _s1.ndistinct_txid AS CPUR FROM main.merchants AS merchants JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql index 0b4530b3c..2806ddea0 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_coupons, + COUNT(*) AS n_rows, merchant_id FROM main.coupons GROUP BY @@ -8,7 +8,7 @@ WITH _s1 AS ( ) SELECT merchants.name AS merchant_name, - _s1.total_coupons + _s1.n_rows AS total_coupons FROM main.merchants AS merchants JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql index 0b4530b3c..2806ddea0 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_coupons, + COUNT(*) AS n_rows, merchant_id FROM main.coupons GROUP BY @@ -8,7 +8,7 @@ WITH _s1 AS ( ) SELECT merchants.name AS merchant_name, - _s1.total_coupons + _s1.n_rows AS total_coupons FROM main.merchants AS merchants JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 4bc325b88..6ee033739 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -1,6 +1,6 @@ WITH _s3 AS ( SELECT - COALESCE(SUM(l_quantity), 0) AS agg_0, + COALESCE(SUM(l_quantity), 0) AS part_qty, l_partkey FROM tpch.lineitem WHERE @@ -9,8 +9,8 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - _s3.agg_0, - part.p_partkey + part.p_partkey, + _s3.part_qty FROM tpch.part AS part JOIN _s3 AS _s3 ON _s3.l_partkey = part.p_partkey @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.agg_0, 0) + 0.5 * COALESCE(_s5.part_qty, 0) ) GROUP BY partsupp.ps_suppkey diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index a5665d63f..ff41af883 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -1,6 +1,6 @@ WITH _s3 AS ( SELECT - COALESCE(SUM(l_quantity), 0) AS agg_0, + COALESCE(SUM(l_quantity), 0) AS part_qty, l_partkey FROM tpch.lineitem WHERE @@ -9,8 +9,8 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - _s3.agg_0, - part.p_partkey + part.p_partkey, + _s3.part_qty FROM tpch.part AS part JOIN _s3 AS _s3 ON _s3.l_partkey = part.p_partkey @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.agg_0, 0) + 0.5 * COALESCE(_s5.part_qty, 0) ) GROUP BY partsupp.ps_suppkey diff --git a/tests/test_sql_refsols/tpch_q5_ansi.sql b/tests/test_sql_refsols/tpch_q5_ansi.sql index ede41e1ca..200918b17 100644 --- a/tests/test_sql_refsols/tpch_q5_ansi.sql +++ b/tests/test_sql_refsols/tpch_q5_ansi.sql @@ -1,37 +1,4 @@ -WITH _s7 AS ( - SELECT - SUM(l_extendedprice * ( - 1 - l_discount - )) AS sum_value, - l_orderkey, - l_suppkey - FROM tpch.lineitem - GROUP BY - l_orderkey, - l_suppkey -), _s10 AS ( - SELECT - ANY_VALUE(nation.n_name) AS anything_n_name, - SUM(_s7.sum_value) AS sum_sum_value, - _s7.l_suppkey, - nation.n_name, - nation.n_nationkey - FROM tpch.nation AS nation - JOIN tpch.region AS region - ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' - JOIN tpch.customer AS customer - ON customer.c_nationkey = nation.n_nationkey - JOIN tpch.orders AS orders - ON customer.c_custkey = orders.o_custkey - AND orders.o_orderdate < CAST('1995-01-01' AS DATE) - AND orders.o_orderdate >= CAST('1994-01-01' AS DATE) - JOIN _s7 AS _s7 - ON _s7.l_orderkey = orders.o_orderkey - GROUP BY - _s7.l_suppkey, - nation.n_name, - nation.n_nationkey -), _s11 AS ( +WITH _s11 AS ( SELECT nation.n_name, supplier.s_suppkey @@ -40,12 +7,24 @@ WITH _s7 AS ( ON nation.n_nationkey = supplier.s_nationkey ) SELECT - ANY_VALUE(_s10.anything_n_name) AS N_NAME, - COALESCE(SUM(_s10.sum_sum_value), 0) AS REVENUE -FROM _s10 AS _s10 + ANY_VALUE(nation.n_name) AS N_NAME, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey + AND orders.o_orderdate < CAST('1995-01-01' AS DATE) + AND orders.o_orderdate >= CAST('1994-01-01' AS DATE) +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey JOIN _s11 AS _s11 - ON _s10.l_suppkey = _s11.s_suppkey AND _s10.n_name = _s11.n_name + ON _s11.n_name = nation.n_name AND _s11.s_suppkey = lineitem.l_suppkey GROUP BY - _s10.n_nationkey + nation.n_nationkey ORDER BY revenue DESC diff --git a/tests/test_sql_refsols/tpch_q5_sqlite.sql b/tests/test_sql_refsols/tpch_q5_sqlite.sql index 1bae1b7b7..d65176b51 100644 --- a/tests/test_sql_refsols/tpch_q5_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q5_sqlite.sql @@ -1,37 +1,4 @@ -WITH _s7 AS ( - SELECT - SUM(l_extendedprice * ( - 1 - l_discount - )) AS sum_value, - l_orderkey, - l_suppkey - FROM tpch.lineitem - GROUP BY - l_orderkey, - l_suppkey -), _s10 AS ( - SELECT - MAX(nation.n_name) AS anything_n_name, - SUM(_s7.sum_value) AS sum_sum_value, - _s7.l_suppkey, - nation.n_name, - nation.n_nationkey - FROM tpch.nation AS nation - JOIN tpch.region AS region - ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' - JOIN tpch.customer AS customer - ON customer.c_nationkey = nation.n_nationkey - JOIN tpch.orders AS orders - ON customer.c_custkey = orders.o_custkey - AND orders.o_orderdate < '1995-01-01' - AND orders.o_orderdate >= '1994-01-01' - JOIN _s7 AS _s7 - ON _s7.l_orderkey = orders.o_orderkey - GROUP BY - _s7.l_suppkey, - nation.n_name, - nation.n_nationkey -), _s11 AS ( +WITH _s11 AS ( SELECT nation.n_name, supplier.s_suppkey @@ -40,12 +7,24 @@ WITH _s7 AS ( ON nation.n_nationkey = supplier.s_nationkey ) SELECT - MAX(_s10.anything_n_name) AS N_NAME, - COALESCE(SUM(_s10.sum_sum_value), 0) AS REVENUE -FROM _s10 AS _s10 + MAX(nation.n_name) AS N_NAME, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey + AND orders.o_orderdate < '1995-01-01' + AND orders.o_orderdate >= '1994-01-01' +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey JOIN _s11 AS _s11 - ON _s10.l_suppkey = _s11.s_suppkey AND _s10.n_name = _s11.n_name + ON _s11.n_name = nation.n_name AND _s11.s_suppkey = lineitem.l_suppkey GROUP BY - _s10.n_nationkey + nation.n_nationkey ORDER BY revenue DESC From 684b5d74052ca9114bc4e2d1cc92cd654cec9c36 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 9 Jul 2025 13:02:51 -0400 Subject: [PATCH 006/143] WIP improvements on projection pullup --- pydough/conversion/projection_pullup.py | 17 +++--- pydough/conversion/relational_converter.py | 21 ++------ tests/test_plan_refsols/aggregate_anti.txt | 4 +- tests/test_plan_refsols/anti_aggregate.txt | 4 +- .../anti_aggregate_alternate.txt | 4 +- tests/test_plan_refsols/anti_singular.txt | 4 +- tests/test_plan_refsols/common_prefix_ad.txt | 14 ++--- tests/test_plan_refsols/common_prefix_ak.txt | 2 +- tests/test_plan_refsols/correl_10.txt | 11 ++-- tests/test_plan_refsols/correl_20.txt | 26 +++++---- tests/test_plan_refsols/correl_34.txt | 2 +- tests/test_plan_refsols/correl_7.txt | 4 +- .../epoch_culture_events_info.txt | 6 +-- .../month_year_sliding_windows.txt | 15 +++--- .../multi_partition_access_6.txt | 53 +++++++++---------- tests/test_plan_refsols/singular_anti.txt | 4 +- .../technograph_monthly_incident_rate.txt | 10 ++-- tests/test_plan_refsols/tpch_q18.txt | 10 ++-- tests/test_plan_refsols/tpch_q22.txt | 6 +-- .../window_filter_order_10.txt | 4 +- ...technograph_monthly_incident_rate_ansi.sql | 26 ++++----- ...chnograph_monthly_incident_rate_sqlite.sql | 26 ++++----- tests/test_sql_refsols/tpch_q18_ansi.sql | 12 ++--- tests/test_sql_refsols/tpch_q18_sqlite.sql | 12 ++--- 24 files changed, 138 insertions(+), 159 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 88bc2d6ef..460a7e5fb 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -10,6 +10,7 @@ from pydough.relational import ( CallExpression, ColumnReference, + CorrelatedReference, Filter, Join, JoinType, @@ -18,6 +19,7 @@ RelationalExpression, RelationalNode, RelationalRoot, + WindowCallExpression, ) from pydough.relational.rel_util import apply_substitution, contains_window from pydough.relational.relational_expressions.column_reference_finder import ( @@ -38,14 +40,14 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: for name, expr in node.columns.items(): new_node_columns[name] = expr match expr: - case ColumnReference(): + case ColumnReference() | CorrelatedReference(): new_project_columns[name] = ColumnReference(name, expr.data_type) - case LiteralExpression() | CallExpression(): + case LiteralExpression() | CallExpression() | WindowCallExpression(): new_project_columns[name] = expr needs_pull = True case _: raise NotImplementedError( - f"Unsupported expression type {expr.__class__.__name__} in join columns." + f"Unsupported expression type {expr.__class__.__name__} in `pull_non_columns` columns." ) if not needs_pull: @@ -112,14 +114,15 @@ def pull_project_into_filter(node: Filter) -> None: new_expr: RelationalExpression = apply_substitution( expr, transfer_substitutions, {} ) - if not (cond_contains_window and contains_window(new_expr)): + expr_contains_window: bool = contains_window(new_expr) + if not (cond_contains_window and expr_contains_window): if name in condition_names: if ref_expr not in existing_outputs: substitutions[ref_expr] = new_expr - else: + elif not expr_contains_window: new_filter_columns[name] = new_expr node._condition = apply_substitution(node.condition, substitutions, {}) - node._columns = new_filter_columns + # node._columns = new_filter_columns def pullup_projections(node: RelationalNode) -> RelationalNode: @@ -137,7 +140,7 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: pull_project_into_join(node, 1) return pull_non_columns(node) case Filter(): - # pull_project_into_filter(node) + pull_project_into_filter(node) return pull_non_columns(node) case _: return node diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index b2f0372a4..7c039253d 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -83,6 +83,7 @@ from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree from .merge_projects import merge_projects +from .projection_pullup import pullup_projections @dataclass @@ -1433,7 +1434,7 @@ def optimize_relational_tree( # Step 5: re-run projection merging. root = confirm_root(merge_projects(root)) - # Step 6: prune unused columns. + # Step 6: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) # Step 7: bubble up names from the leaf nodes to further encourage simpler @@ -1441,24 +1442,9 @@ def optimize_relational_tree( # possible. root = bubble_column_names(root) - # Step 8: re-run column pruning. - root = ColumnPruner().prune_unused_columns(root) - - # Step 9: re-run projection merging. - root = confirm_root(merge_projects(root)) - - """ - # Step 6: bubble up names from the leaf nodes to further encourage simpler - # naming without aliases, and also to delete duplicate columns where - # possible. - root = bubble_column_names(root) - - # Step 7: run projection pullup. + # Step 8: run projection pullup. root = confirm_root(pullup_projections(root)) - # Step 8: prune unused columns. - root = ColumnPruner().prune_unused_columns(root) - # Step 9: re-run filter pushdown root._input = push_filters(root.input, set()) @@ -1467,7 +1453,6 @@ def optimize_relational_tree( # Step 11: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) - """ return root diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index b5bba6374..e1482682d 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(NULL_2, 0:numeric)), ('avg_price_of_10parts', NULL_2), ('sum_price_of_10parts', DEFAULT_TO(NULL_2, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_2': None:unknown, 's_name': t0.s_name}) +ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index b5bba6374..e1482682d 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(NULL_2, 0:numeric)), ('avg_price_of_10parts', NULL_2), ('sum_price_of_10parts', DEFAULT_TO(NULL_2, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_2': None:unknown, 's_name': t0.s_name}) +ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index 4e01b49b6..af1852c80 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(NULL_2, 0:numeric)), ('avg_price_of_10parts', DEFAULT_TO(NULL_2, 0:numeric)), ('sum_price_of_10parts', NULL_2)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_2': None:unknown, 's_name': t0.s_name}) +ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('sum_price_of_10parts', None:unknown)], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_singular.txt b/tests/test_plan_refsols/anti_singular.txt index b432cef13..a66ce09a4 100644 --- a/tests/test_plan_refsols/anti_singular.txt +++ b/tests/test_plan_refsols/anti_singular.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', n_name), ('region_name', NULL_1)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_1': None:unknown, 'n_name': t0.n_name}) +ROOT(columns=[('name', n_name), ('region_name', None:unknown)], orderings=[]) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 31d071505..00ebd7afd 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -4,13 +4,13 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_ SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'qty_shipped': t1.qty_shipped}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 905b6bb93..694228447 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines)], orderings=[(anything_n_name):asc_first]) PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) - FILTER(condition=sum_sum_n_rows > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_10.txt b/tests/test_plan_refsols/correl_10.txt index 217a786c0..a2954ac48 100644 --- a/tests/test_plan_refsols/correl_10.txt +++ b/tests/test_plan_refsols/correl_10.txt @@ -1,6 +1,7 @@ ROOT(columns=[('name', n_name), ('rname', NULL_4)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_4': None:unknown, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'NULL_4': None:unknown, 'n_name': n_name}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 8c61c789f..a480ec3e3 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,16 +1,14 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=domestic, columns={}) - PROJECT(columns={'domestic': name_16 == n_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'name_16': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 9a051c870..4ee636ab6 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -13,7 +13,7 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=l_returnflag == 'N':string & l_linestatus == 'F':string, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + FILTER(condition=l_linestatus == 'F':string & l_returnflag == 'N':string, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_linestatus': l_linestatus, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_returnflag': l_returnflag, 'l_suppkey': l_suppkey}) FILTER(condition=YEAR(o_orderdate) >= 1995:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_7.txt b/tests/test_plan_refsols/correl_7.txt index 1a0fbcdc7..6ba1a011d 100644 --- a/tests/test_plan_refsols/correl_7.txt +++ b/tests/test_plan_refsols/correl_7.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', r_name), ('n_prefix_nations', DEFAULT_TO(NULL_4, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_4': None:unknown, 'r_name': t0.r_name}) +ROOT(columns=[('name', r_name), ('n_prefix_nations', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index 2b98c54a1..be5415490 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -2,11 +2,11 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', eve LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': event_year, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 'event_year': t0.event_year, 's_name': t0.s_name, 't_name': t1.t_name}) JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 'event_year': t0.event_year, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 'event_year': t0.event_year}) - PROJECT(columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'event_year': YEAR(ev_dt)}) + PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'event_year': YEAR(ev_dt)}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index b4da40d64..c9e01e26a 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,14 +1,13 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'month_total_spent': t1.month_total_spent, 'year': t1.year}) - FILTER(condition=curr_year_total_spent > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'curr_year_total_spent': DEFAULT_TO(sum_month_total_spent, 0:numeric), 'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), columns={'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) + PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 05b282309..5ee7193e5 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -2,26 +2,25 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=n_ticker_type_trans == 1:numeric | n_cust_type_trans == 1:numeric, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_cust_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - PROJECT(columns={'n_cust_trans': DEFAULT_TO(sum_n_cust_type_trans, 0:numeric), 'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) @@ -41,21 +40,19 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) - PROJECT(columns={'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_ticker_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/singular_anti.txt b/tests/test_plan_refsols/singular_anti.txt index 8bc55d80a..de7e0a463 100644 --- a/tests/test_plan_refsols/singular_anti.txt +++ b/tests/test_plan_refsols/singular_anti.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('nation_name', n_name), ('region_name', NULL_1)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_1': None:unknown, 'n_name': t0.n_name}) +ROOT(columns=[('nation_name', n_name), ('region_name', None:unknown)], orderings=[]) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index b73085185..5912c5390 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -10,9 +10,8 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) @@ -20,9 +19,8 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index c55be44f7..fc34417b8 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,10 +1,10 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'TOTAL_QUANTITY': t1.TOTAL_QUANTITY, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'TOTAL_QUANTITY': t1.TOTAL_QUANTITY, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'l_orderkey': l_orderkey}) PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'l_orderkey': l_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 6195aa0a0..06077eb2c 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -8,10 +8,8 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) - FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - PROJECT(columns={'c_acctbal': c_acctbal, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 997dfa722..28100ecde 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[NULL_1], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_1': None:unknown, 'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index f824db756..3aca1a585 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -4,7 +4,7 @@ WITH _t4 AS ( FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t8 AS ( +), _t6 AS ( SELECT co_id, co_name @@ -14,29 +14,29 @@ WITH _t4 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t7.ca_dt - FROM _t4 AS _t7 + _s0.ca_dt + FROM _t4 AS _s0 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_t7.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_s0.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t8 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t6 AS _t6 + ON _t6.co_id = devices.de_production_country_id GROUP BY - _t7.ca_dt + _s0.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t11.ca_dt - FROM _t4 AS _t11 + _s8.ca_dt + FROM _t4 AS _s8 JOIN main.incidents AS incidents - ON _t11.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t12 - ON _t12.co_id = devices.de_production_country_id + JOIN _t6 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _t11.ca_dt + _s8.ca_dt ) SELECT CONCAT_WS( diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index 4d1842b88..b9ae32845 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -4,7 +4,7 @@ WITH _t4 AS ( FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t8 AS ( +), _t6 AS ( SELECT co_id, co_name @@ -14,29 +14,29 @@ WITH _t4 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t7.ca_dt - FROM _t4 AS _t7 + _s0.ca_dt + FROM _t4 AS _s0 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_t7.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_s0.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t8 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t6 AS _t6 + ON _t6.co_id = devices.de_production_country_id GROUP BY - _t7.ca_dt + _s0.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t11.ca_dt - FROM _t4 AS _t11 + _s8.ca_dt + FROM _t4 AS _s8 JOIN main.incidents AS incidents - ON _t11.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _s8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t12 - ON _t12.co_id = devices.de_production_country_id + JOIN _t6 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _t11.ca_dt + _s8.ca_dt ) SELECT CONCAT_WS( diff --git a/tests/test_sql_refsols/tpch_q18_ansi.sql b/tests/test_sql_refsols/tpch_q18_ansi.sql index c75c107e0..aa9134752 100644 --- a/tests/test_sql_refsols/tpch_q18_ansi.sql +++ b/tests/test_sql_refsols/tpch_q18_ansi.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_t0.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _t0 AS _t0 - ON NOT _t0.sum_l_quantity IS NULL - AND _t0.l_orderkey = orders.o_orderkey - AND _t0.sum_l_quantity > 300 +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate diff --git a/tests/test_sql_refsols/tpch_q18_sqlite.sql b/tests/test_sql_refsols/tpch_q18_sqlite.sql index c75c107e0..aa9134752 100644 --- a/tests/test_sql_refsols/tpch_q18_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q18_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_t0.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _t0 AS _t0 - ON NOT _t0.sum_l_quantity IS NULL - AND _t0.l_orderkey = orders.o_orderkey - AND _t0.sum_l_quantity > 300 +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate From cc004ecbb4bfe17f1736733f6dfafb8a088c4dec Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 9 Jul 2025 14:50:34 -0400 Subject: [PATCH 007/143] Fixing filter/join cases --- pydough/conversion/projection_pullup.py | 66 +++++++++++-------- tests/test_plan_refsols/bad_child_reuse_2.txt | 17 ++--- tests/test_plan_refsols/bad_child_reuse_3.txt | 17 ++--- tests/test_plan_refsols/common_prefix_n.txt | 49 +++++++------- tests/test_plan_refsols/common_prefix_o.txt | 53 +++++++-------- tests/test_plan_refsols/correl_24.txt | 14 ++-- .../month_year_sliding_windows.txt | 15 +++-- .../technograph_monthly_incident_rate.txt | 7 +- ..._year_cumulative_incident_rate_overall.txt | 27 ++++---- ...technograph_monthly_incident_rate_ansi.sql | 34 +++++----- ...chnograph_monthly_incident_rate_sqlite.sql | 34 +++++----- ..._cumulative_incident_rate_overall_ansi.sql | 20 +++--- ...umulative_incident_rate_overall_sqlite.sql | 20 +++--- 13 files changed, 198 insertions(+), 175 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 460a7e5fb..f211f4d0d 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -8,18 +8,14 @@ from pydough.relational import ( - CallExpression, ColumnReference, - CorrelatedReference, Filter, Join, JoinType, - LiteralExpression, Project, RelationalExpression, RelationalNode, RelationalRoot, - WindowCallExpression, ) from pydough.relational.rel_util import apply_substitution, contains_window from pydough.relational.relational_expressions.column_reference_finder import ( @@ -39,20 +35,35 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: for name, expr in node.columns.items(): new_node_columns[name] = expr - match expr: - case ColumnReference() | CorrelatedReference(): - new_project_columns[name] = ColumnReference(name, expr.data_type) - case LiteralExpression() | CallExpression() | WindowCallExpression(): - new_project_columns[name] = expr - needs_pull = True - case _: - raise NotImplementedError( - f"Unsupported expression type {expr.__class__.__name__} in `pull_non_columns` columns." - ) + if isinstance(expr, ColumnReference): + new_project_columns[name] = ColumnReference(name, expr.data_type) + else: + new_project_columns[name] = expr + needs_pull = True if not needs_pull: return node + existing_vals: set[RelationalExpression] = set(new_node_columns.values()) + substitutions: dict[RelationalExpression, RelationalExpression] = {} + for input_idx in range(len(node.inputs)): + input_node: RelationalNode = node.inputs[input_idx] + for name, expr in input_node.columns.items(): + ref_expr: ColumnReference = ColumnReference( + name, expr.data_type, input_name=node.default_input_aliases[input_idx] + ) + if expr not in existing_vals: + new_name: str = name + idx: int = 0 + while new_name in new_node_columns: + idx += 1 + new_name = f"{name}_{idx}" + new_ref: ColumnReference = ColumnReference(new_name, expr.data_type) + new_node_columns[new_name] = ref_expr + substitutions[ref_expr] = new_ref + for name, expr in new_project_columns.items(): + new_project_columns[name] = apply_substitution(expr, substitutions, {}) + new_input: RelationalNode = node.copy(columns=new_node_columns) return Project(input=new_input, columns=new_project_columns) @@ -79,6 +90,11 @@ def pull_project_into_filter(node: Filter) -> None: node.condition.accept(finder) condition_cols: set[ColumnReference] = finder.get_column_references() condition_names: set[str] = {col.name for col in condition_cols} + finder.reset() + for expr in node.columns.values(): + expr.accept(finder) + output_cols: set[ColumnReference] = finder.get_column_references() + output_names: set[str] = {col.name for col in output_cols} ref_expr: ColumnReference new_ref: ColumnReference @@ -104,25 +120,23 @@ def pull_project_into_filter(node: Filter) -> None: node._input = project.copy(columns=new_project_columns) - cond_contains_window: bool = contains_window(node.condition) substitutions: dict[RelationalExpression, RelationalExpression] = {} - existing_outputs: set[RelationalExpression] = set(node.columns.values()) - new_filter_columns: dict[str, RelationalExpression] = {} for name, expr in project.columns.items(): ref_expr = ColumnReference(name, expr.data_type) - new_filter_columns[name] = expr new_expr: RelationalExpression = apply_substitution( expr, transfer_substitutions, {} ) - expr_contains_window: bool = contains_window(new_expr) - if not (cond_contains_window and expr_contains_window): - if name in condition_names: - if ref_expr not in existing_outputs: - substitutions[ref_expr] = new_expr - elif not expr_contains_window: - new_filter_columns[name] = new_expr + if (not contains_window(new_expr)) and ( + (name in condition_names) != (name in output_names) + ): + substitutions[ref_expr] = apply_substitution( + expr, transfer_substitutions, {} + ) node._condition = apply_substitution(node.condition, substitutions, {}) - # node._columns = new_filter_columns + node._columns = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.columns.items() + } def pullup_projections(node: RelationalNode) -> RelationalNode: diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 128fe3cf1..1f5ca48a2 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,10 +1,11 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 128fe3cf1..1f5ca48a2 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,10 +1,11 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 71e91f37d..01c51b8e2 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,26 +1,27 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 907f12e2a..0e7f831a2 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,28 +1,29 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index af102dee8..1217865d6 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -4,9 +4,11 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orde JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index c9e01e26a..a0098dd14 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,13 +1,14 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'month_total_spent': t1.month_total_spent, 'year': t1.year}) - FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), columns={'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) + PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 5912c5390..ab43e7a9a 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -3,9 +3,10 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index b34bf2502..e333d030c 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,17 +1,18 @@ ROOT(columns=[('yr', year), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought_change), ('pct_incident_change', pct_incident_change), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) PROJECT(columns={'cum_ir': ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric), 'n_devices': n_devices, 'n_incidents': n_incidents, 'pct_bought_change': ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric), 'pct_incident_change': ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric), 'year': year}) - FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'n_incidents': n_incidents, 'year': year}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + PROJECT(columns={'n_devices': n_devices, 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) + FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) + PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 3aca1a585..593db1ec1 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,10 +1,10 @@ -WITH _t4 AS ( +WITH _t5 AS ( SELECT ca_dt FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t6 AS ( +), _t7 AS ( SELECT co_id, co_name @@ -15,37 +15,37 @@ WITH _t4 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t4 AS _s0 + FROM _t5 AS _s0 JOIN main.calendar AS calendar ON calendar.ca_dt >= DATE_ADD(CAST(_s0.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t6 AS _t6 - ON _t6.co_id = devices.de_production_country_id + JOIN _t7 AS _t7 + ON _t7.co_id = devices.de_production_country_id GROUP BY _s0.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, _s8.ca_dt - FROM _t4 AS _s8 + FROM _t5 AS _s8 JOIN main.incidents AS incidents ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t6 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t7 AS _t9 + ON _t9.co_id = devices.de_production_country_id GROUP BY _s8.ca_dt ) SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t4 AS _t4 +FROM _t5 AS _t5 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t4.ca_dt + ON _s7.ca_dt = _t5.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t4.ca_dt + ON _s15.ca_dt = _t5.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index b9ae32845..581509700 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,10 +1,10 @@ -WITH _t4 AS ( +WITH _t5 AS ( SELECT ca_dt FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t6 AS ( +), _t7 AS ( SELECT co_id, co_name @@ -15,37 +15,37 @@ WITH _t4 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t4 AS _s0 + FROM _t5 AS _s0 JOIN main.calendar AS calendar ON calendar.ca_dt >= DATETIME(_s0.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t6 AS _t6 - ON _t6.co_id = devices.de_production_country_id + JOIN _t7 AS _t7 + ON _t7.co_id = devices.de_production_country_id GROUP BY _s0.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, _s8.ca_dt - FROM _t4 AS _s8 + FROM _t5 AS _s8 JOIN main.incidents AS incidents ON _s8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t6 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t7 AS _t9 + ON _t9.co_id = devices.de_production_country_id GROUP BY _s8.ca_dt ) SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t4 AS _t4 +FROM _t5 AS _t5 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t4.ca_dt + ON _s7.ca_dt = _t5.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t4.ca_dt + ON _s15.ca_dt = _t5.ca_dt GROUP BY - CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER) + CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index 85a844a7c..a296d1e1b 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t6 AS ( SELECT ca_dt FROM main.calendar @@ -6,7 +6,7 @@ WITH _t5 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t5 AS _s0 + FROM _t6 AS _s0 JOIN main.devices AS devices ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) GROUP BY @@ -15,23 +15,23 @@ WITH _t5 AS ( SELECT COUNT(*) AS n_rows, _s4.ca_dt - FROM _t5 AS _s4 + FROM _t6 AS _s4 JOIN main.incidents AS incidents ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) GROUP BY _s4.ca_dt -), _t3 AS ( +), _t4 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)) AS year - FROM _t5 AS _t5 + EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) AS year + FROM _t6 AS _t6 LEFT JOIN _s3 AS _s3 - ON _s3.ca_dt = _t5.ca_dt + ON _s3.ca_dt = _t6.ca_dt LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t5.ca_dt + ON _s7.ca_dt = _t6.ca_dt GROUP BY - EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)) + EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) ), _t0 AS ( SELECT ROUND( @@ -57,7 +57,7 @@ WITH _t5 AS ( COALESCE(sum_expr_3, 0) AS n_devices, COALESCE(sum_n_rows, 0) AS n_incidents, year - FROM _t3 + FROM _t4 WHERE NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ) diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index 8003d941c..783a358a4 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t6 AS ( SELECT ca_dt FROM main.calendar @@ -6,7 +6,7 @@ WITH _t5 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t5 AS _s0 + FROM _t6 AS _s0 JOIN main.devices AS devices ON _s0.ca_dt = DATE(devices.de_purchase_ts, 'start of day') GROUP BY @@ -15,23 +15,23 @@ WITH _t5 AS ( SELECT COUNT(*) AS n_rows, _s4.ca_dt - FROM _t5 AS _s4 + FROM _t6 AS _s4 JOIN main.incidents AS incidents ON _s4.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') GROUP BY _s4.ca_dt -), _t3 AS ( +), _t4 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER) AS year - FROM _t5 AS _t5 + CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) AS year + FROM _t6 AS _t6 LEFT JOIN _s3 AS _s3 - ON _s3.ca_dt = _t5.ca_dt + ON _s3.ca_dt = _t6.ca_dt LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t5.ca_dt + ON _s7.ca_dt = _t6.ca_dt GROUP BY - CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER) + CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) ), _t0 AS ( SELECT ROUND( @@ -57,7 +57,7 @@ WITH _t5 AS ( COALESCE(sum_expr_3, 0) AS n_devices, COALESCE(sum_n_rows, 0) AS n_incidents, year - FROM _t3 + FROM _t4 WHERE NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ) From de7d4e66f0b0fafbdfe34ffbeaac4ae9b548f213 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 9 Jul 2025 15:53:56 -0400 Subject: [PATCH 008/143] Bugfixes, testing for correctness [RUN CI] --- pydough/conversion/projection_pullup.py | 80 ++++++++----------- pydough/relational/rel_util.py | 2 + tests/test_plan_refsols/bad_child_reuse_2.txt | 6 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 6 +- tests/test_plan_refsols/common_prefix_al.txt | 4 +- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_o.txt | 10 +-- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/nation_best_order.txt | 4 +- .../time_threshold_reached.txt | 4 +- .../window_filter_order_10.txt | 13 +-- .../time_threshold_reached_ansi.sql | 6 +- .../time_threshold_reached_sqlite.sql | 6 +- 13 files changed, 70 insertions(+), 79 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index f211f4d0d..07fd199d8 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -25,26 +25,13 @@ from .merge_projects import merge_adjacent_projects -def pull_non_columns(node: RelationalNode) -> RelationalNode: +def widen_columns( + node: RelationalNode, +) -> dict[RelationalExpression, RelationalExpression]: """ TODO """ - new_node_columns: dict[str, RelationalExpression] = {} - new_project_columns: dict[str, RelationalExpression] = {} - needs_pull: bool = False - - for name, expr in node.columns.items(): - new_node_columns[name] = expr - if isinstance(expr, ColumnReference): - new_project_columns[name] = ColumnReference(name, expr.data_type) - else: - new_project_columns[name] = expr - needs_pull = True - - if not needs_pull: - return node - - existing_vals: set[RelationalExpression] = set(new_node_columns.values()) + existing_vals: set[RelationalExpression] = set(node.columns.values()) substitutions: dict[RelationalExpression, RelationalExpression] = {} for input_idx in range(len(node.inputs)): input_node: RelationalNode = node.inputs[input_idx] @@ -55,17 +42,39 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: if expr not in existing_vals: new_name: str = name idx: int = 0 - while new_name in new_node_columns: + while new_name in node.columns: idx += 1 new_name = f"{name}_{idx}" new_ref: ColumnReference = ColumnReference(new_name, expr.data_type) - new_node_columns[new_name] = ref_expr + node.columns[new_name] = ref_expr substitutions[ref_expr] = new_ref + return substitutions + + +def pull_non_columns(node: RelationalNode) -> RelationalNode: + """ + TODO + """ + new_project_columns: dict[str, RelationalExpression] = {} + needs_pull: bool = False + + for name, expr in node.columns.items(): + if isinstance(expr, ColumnReference): + new_project_columns[name] = ColumnReference(name, expr.data_type) + else: + new_project_columns[name] = expr + needs_pull = True + + if not needs_pull: + return node + + substitutions: dict[RelationalExpression, RelationalExpression] = widen_columns( + node + ) for name, expr in new_project_columns.items(): new_project_columns[name] = apply_substitution(expr, substitutions, {}) - new_input: RelationalNode = node.copy(columns=new_node_columns) - return Project(input=new_input, columns=new_project_columns) + return Project(input=node, columns=new_project_columns) def pull_project_into_join(node: Join, input_index: int) -> None: @@ -96,39 +105,18 @@ def pull_project_into_filter(node: Filter) -> None: output_cols: set[ColumnReference] = finder.get_column_references() output_names: set[str] = {col.name for col in output_cols} - ref_expr: ColumnReference - new_ref: ColumnReference - new_project_columns: dict[str, RelationalExpression] = {} - used_cols: set[RelationalExpression] = set() - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = {} - for name, expr in project.columns.items(): - new_project_columns[name] = expr - used_cols.add(expr) - for name, expr in project.input.columns.items(): - ref_expr = ColumnReference(name, expr.data_type) - if name in condition_names: - continue - if ref_expr not in used_cols: - new_name: str = name - idx: int = 0 - while new_name in new_project_columns: - idx += 1 - new_name = f"{name}_{idx}" - new_ref = ColumnReference(name, expr.data_type) - new_project_columns[new_name] = new_ref - transfer_substitutions[ref_expr] = new_ref - - node._input = project.copy(columns=new_project_columns) - + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): - ref_expr = ColumnReference(name, expr.data_type) new_expr: RelationalExpression = apply_substitution( expr, transfer_substitutions, {} ) if (not contains_window(new_expr)) and ( (name in condition_names) != (name in output_names) ): + ref_expr: ColumnReference = ColumnReference(name, expr.data_type) substitutions[ref_expr] = apply_substitution( expr, transfer_substitutions, {} ) diff --git a/pydough/relational/rel_util.py b/pydough/relational/rel_util.py index c0423f91e..34f485325 100644 --- a/pydough/relational/rel_util.py +++ b/pydough/relational/rel_util.py @@ -306,6 +306,8 @@ def build_filter( assert isinstance(new_join, Join) new_join.condition = condition new_join.cardinality = new_join.cardinality.add_potential_filter() + if columns is not None: + return Project(new_join, columns) return new_join # Otherwise, just return a new filter node with the new condition on top diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 1f5ca48a2..ef6c8f5ca 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows_1, 0:numeric)}) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows_1': n_rows_1}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 1f5ca48a2..ef6c8f5ca 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows_1, 0:numeric)}) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows_1': n_rows_1}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 8449c01ff..ee63e2821 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -2,8 +2,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discou JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t0.n_no_tax_discount, 'n_orders': t0.n_orders}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t1.n_no_tax_discount, 'n_orders': t0.n_orders}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 23c6a811a..126aa69c8 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 0e7f831a2..642465175 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,13 +1,13 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 9d8b97da5..dec2085d2 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) - PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) + FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name_1, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate_1}) + PROJECT(columns={'c_name_1': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate_1': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index caee91153..738a980a1 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,8 +4,8 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) - PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name_1, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + PROJECT(columns={'c_name_1': c_name, 'c_nationkey': c_nationkey, 'o_orderkey_1': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/time_threshold_reached.txt b/tests/test_plan_refsols/time_threshold_reached.txt index 8b94ac8f2..e83bd7ade 100644 --- a/tests/test_plan_refsols/time_threshold_reached.txt +++ b/tests/test_plan_refsols/time_threshold_reached.txt @@ -1,7 +1,7 @@ ROOT(columns=[('date_time', sbTxDateTime)], orderings=[(sbTxDateTime):asc_first]) FILTER(condition=RANKING(args=[], partition=[txn_day], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) - FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) - PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) + FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day_1}) + PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day_1': txn_day}) PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 28100ecde..c436e164f 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,8 +1,9 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) - FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=o_totalprice_1 < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + PROJECT(columns={'o_totalprice_1': o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_sql_refsols/time_threshold_reached_ansi.sql b/tests/test_sql_refsols/time_threshold_reached_ansi.sql index 853086170..08eebf4aa 100644 --- a/tests/test_sql_refsols/time_threshold_reached_ansi.sql +++ b/tests/test_sql_refsols/time_threshold_reached_ansi.sql @@ -3,8 +3,8 @@ WITH _t3 AS ( ( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) / SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP))) AS pct_of_day, - sbtxdatetime, - DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day + DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day_1, + sbtxdatetime FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) = 2023 @@ -15,7 +15,7 @@ WITH _t3 AS ( WHERE pct_of_day >= 50.0 QUALIFY - ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day NULLS LAST) = 1 + ROW_NUMBER() OVER (PARTITION BY txn_day_1 ORDER BY pct_of_day NULLS LAST) = 1 ) SELECT sbtxdatetime AS date_time diff --git a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql index 5e47efb24..c69bb3b85 100644 --- a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql +++ b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql @@ -3,15 +3,15 @@ WITH _t3 AS ( CAST(( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day') ORDER BY sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS REAL) / SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day')) AS pct_of_day, - sbtxdatetime, - DATE(sbtxdatetime, 'start of day') AS txn_day + DATE(sbtxdatetime, 'start of day') AS txn_day_1, + sbtxdatetime FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) = 2023 ), _t AS ( SELECT sbtxdatetime, - ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day) AS _w + ROW_NUMBER() OVER (PARTITION BY txn_day_1 ORDER BY pct_of_day) AS _w FROM _t3 WHERE pct_of_day >= 50.0 From 38272764d9c38974e4843d1a5a375cfe978a9b4f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 12 Jul 2025 03:51:18 -0400 Subject: [PATCH 009/143] Finished dealing with JOIN pull-up --- pydough/conversion/projection_pullup.py | 61 +++++++++++++++++-- pydough/conversion/relational_converter.py | 4 ++ pydough/relational/rel_util.py | 49 +++++++++++++++ .../agg_orders_by_year_month_just_europe.txt | 16 ++--- .../agg_orders_by_year_month_vs_europe.txt | 16 ++--- tests/test_plan_refsols/aggregate_semi.txt | 6 +- .../aggregate_then_backref.txt | 11 ++-- .../aggregation_analytics_2.txt | 31 +++++----- .../aggregation_analytics_3.txt | 31 +++++----- .../test_plan_refsols/avg_acctbal_wo_debt.txt | 17 +++--- tests/test_plan_refsols/bad_child_reuse_2.txt | 6 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 6 +- tests/test_plan_refsols/common_prefix_a.txt | 8 +-- tests/test_plan_refsols/common_prefix_ad.txt | 12 ++-- tests/test_plan_refsols/common_prefix_al.txt | 20 +++--- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_b.txt | 10 +-- tests/test_plan_refsols/common_prefix_c.txt | 41 +++++++------ tests/test_plan_refsols/common_prefix_d.txt | 35 +++++------ tests/test_plan_refsols/common_prefix_e.txt | 8 +-- tests/test_plan_refsols/common_prefix_f.txt | 10 +-- tests/test_plan_refsols/common_prefix_g.txt | 10 +-- tests/test_plan_refsols/common_prefix_h.txt | 41 +++++++------ tests/test_plan_refsols/common_prefix_i.txt | 6 +- tests/test_plan_refsols/common_prefix_m.txt | 12 ++-- tests/test_plan_refsols/common_prefix_o.txt | 6 +- tests/test_plan_refsols/common_prefix_p.txt | 9 ++- tests/test_plan_refsols/common_prefix_r.txt | 6 +- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/common_prefix_t.txt | 14 ++--- tests/test_plan_refsols/common_prefix_u.txt | 14 ++--- tests/test_plan_refsols/common_prefix_x.txt | 6 +- tests/test_plan_refsols/correl_14.txt | 17 +++--- tests/test_plan_refsols/correl_15.txt | 17 +++--- tests/test_plan_refsols/correl_17.txt | 7 +-- tests/test_plan_refsols/correl_18.txt | 13 ++-- tests/test_plan_refsols/correl_20.txt | 15 ++--- tests/test_plan_refsols/correl_24.txt | 4 +- tests/test_plan_refsols/correl_26.txt | 8 +-- tests/test_plan_refsols/correl_27.txt | 8 +-- tests/test_plan_refsols/correl_28.txt | 8 +-- tests/test_plan_refsols/correl_29.txt | 11 ++-- tests/test_plan_refsols/correl_30.txt | 24 ++++---- tests/test_plan_refsols/correl_31.txt | 26 ++++---- .../count_cust_supplier_nation_combos.txt | 13 ++-- .../cumulative_stock_analysis.txt | 6 +- .../customer_largest_order_deltas.txt | 8 +-- .../customer_most_recent_orders.txt | 6 +- tests/test_plan_refsols/dumb_aggregation.txt | 8 +-- .../epoch_culture_events_info.txt | 16 ++--- .../test_plan_refsols/global_calc_backref.txt | 7 +-- tests/test_plan_refsols/hour_minute_day.txt | 8 +-- ...lineitems_access_cust_supplier_nations.txt | 9 ++- .../lines_shipping_vs_customer_region.txt | 11 ++-- .../month_year_sliding_windows.txt | 13 ++-- .../mostly_positive_accounts_per_nation3.txt | 8 +-- .../multi_partition_access_5.txt | 16 ++--- ...ple_simple_aggregations_multiple_calcs.txt | 17 +++--- ...ltiple_simple_aggregations_single_calc.txt | 16 +++-- tests/test_plan_refsols/nation_best_order.txt | 4 +- .../num_positive_accounts_per_nation.txt | 15 +++-- .../orders_sum_line_price.txt | 9 ++- .../orders_sum_vs_count_line_price.txt | 9 ++- tests/test_plan_refsols/part_reduced_size.txt | 10 +-- .../region_acctbal_breakdown.txt | 6 +- .../test_plan_refsols/supplier_best_part.txt | 20 +++--- ...hnograph_incident_rate_by_release_year.txt | 18 +++--- .../technograph_monthly_incident_rate.txt | 42 ++++++------- .../technograph_most_unreliable_products.txt | 6 +- ...umulative_incident_rate_goldcopperstar.txt | 30 ++++----- ..._year_cumulative_incident_rate_overall.txt | 18 +++--- tests/test_plan_refsols/tpch_q10.txt | 14 ++--- tests/test_plan_refsols/tpch_q11.txt | 31 +++++----- tests/test_plan_refsols/tpch_q12.txt | 8 +-- tests/test_plan_refsols/tpch_q15.txt | 33 +++++----- tests/test_plan_refsols/tpch_q18.txt | 12 ++-- tests/test_plan_refsols/tpch_q20.txt | 13 ++-- tests/test_plan_refsols/tpch_q3.txt | 14 ++--- tests/test_plan_refsols/tpch_q5.txt | 30 ++++----- .../various_aggfuncs_simple.txt | 6 +- .../window_filter_order_10.txt | 13 ++-- .../year_month_nation_orders.txt | 16 ++--- .../defog_broker_adv16_ansi.sql | 13 ++-- .../defog_broker_adv16_sqlite.sql | 13 ++-- .../defog_dealership_basic5_ansi.sql | 10 +-- .../defog_dealership_basic5_sqlite.sql | 10 +-- .../defog_ewallet_adv11_ansi.sql | 11 ++-- .../defog_ewallet_adv11_sqlite.sql | 21 +++---- ...aph_incident_rate_by_release_year_ansi.sql | 18 +++--- ...h_incident_rate_by_release_year_sqlite.sql | 18 +++--- ...hnograph_most_unreliable_products_ansi.sql | 5 +- ...ograph_most_unreliable_products_sqlite.sql | 5 +- ...tive_incident_rate_goldcopperstar_ansi.sql | 16 ++--- ...ve_incident_rate_goldcopperstar_sqlite.sql | 16 ++--- ..._cumulative_incident_rate_overall_ansi.sql | 16 ++--- ...umulative_incident_rate_overall_sqlite.sql | 16 ++--- tests/test_sql_refsols/tpch_q11_ansi.sql | 14 +++-- tests/test_sql_refsols/tpch_q11_sqlite.sql | 14 +++-- tests/test_sql_refsols/tpch_q15_ansi.sql | 24 ++++---- tests/test_sql_refsols/tpch_q15_sqlite.sql | 24 ++++---- tests/test_sql_refsols/tpch_q18_ansi.sql | 12 ++-- tests/test_sql_refsols/tpch_q18_sqlite.sql | 12 ++-- tests/test_sql_refsols/tpch_q20_ansi.sql | 6 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 6 +- 104 files changed, 807 insertions(+), 720 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 07fd199d8..cdfe2a553 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -17,7 +17,11 @@ RelationalNode, RelationalRoot, ) -from pydough.relational.rel_util import apply_substitution, contains_window +from pydough.relational.rel_util import ( + add_input_name, + apply_substitution, + contains_window, +) from pydough.relational.relational_expressions.column_reference_finder import ( ColumnReferenceFinder, ) @@ -31,11 +35,15 @@ def widen_columns( """ TODO """ - existing_vals: set[RelationalExpression] = set(node.columns.values()) + existing_vals: dict[RelationalExpression, RelationalExpression] = { + expr: ColumnReference(name, expr.data_type) + for name, expr in node.columns.items() + } substitutions: dict[RelationalExpression, RelationalExpression] = {} for input_idx in range(len(node.inputs)): input_node: RelationalNode = node.inputs[input_idx] for name, expr in input_node.columns.items(): + expr = add_input_name(expr, node.default_input_aliases[input_idx]) ref_expr: ColumnReference = ColumnReference( name, expr.data_type, input_name=node.default_input_aliases[input_idx] ) @@ -47,8 +55,11 @@ def widen_columns( new_name = f"{name}_{idx}" new_ref: ColumnReference = ColumnReference(new_name, expr.data_type) node.columns[new_name] = ref_expr + existing_vals[expr] = ref_expr substitutions[ref_expr] = new_ref - return substitutions + else: + substitutions[ref_expr] = existing_vals[expr] + return {k: v for k, v in substitutions.items() if k != v} def pull_non_columns(node: RelationalNode) -> RelationalNode: @@ -71,6 +82,7 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: substitutions: dict[RelationalExpression, RelationalExpression] = widen_columns( node ) + substitutions = {k: add_input_name(v, None) for k, v in substitutions.items()} for name, expr in new_project_columns.items(): new_project_columns[name] = apply_substitution(expr, substitutions, {}) @@ -84,6 +96,45 @@ def pull_project_into_join(node: Join, input_index: int) -> None: if not isinstance(node.inputs[input_index], Project): return + project = node.inputs[input_index] + assert isinstance(project, Project) + + input_name: str | None = node.default_input_aliases[input_index] + + finder: ColumnReferenceFinder = ColumnReferenceFinder() + finder.reset() + node.condition.accept(finder) + condition_cols: set[ColumnReference] = finder.get_column_references() + condition_names: set[str] = {col.name for col in condition_cols} + finder.reset() + for expr in node.columns.values(): + expr.accept(finder) + output_cols: set[ColumnReference] = finder.get_column_references() + output_names: set[str] = {col.name for col in output_cols} + + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) + + substitutions: dict[RelationalExpression, RelationalExpression] = {} + for name, expr in project.columns.items(): + new_expr: RelationalExpression = add_input_name( + apply_substitution(expr, transfer_substitutions, {}), input_name + ) + if (not contains_window(new_expr)) and ( + (name in condition_names) != (name in output_names) + ): + ref_expr: ColumnReference = ColumnReference( + name, expr.data_type, input_name=input_name + ) + substitutions[ref_expr] = new_expr + + node._condition = apply_substitution(node.condition, substitutions, {}) + node._columns = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.columns.items() + } + def pull_project_into_filter(node: Filter) -> None: """ @@ -117,9 +168,7 @@ def pull_project_into_filter(node: Filter) -> None: (name in condition_names) != (name in output_names) ): ref_expr: ColumnReference = ColumnReference(name, expr.data_type) - substitutions[ref_expr] = apply_substitution( - expr, transfer_substitutions, {} - ) + substitutions[ref_expr] = new_expr node._condition = apply_substitution(node.condition, substitutions, {}) node._columns = { name: apply_substitution(expr, substitutions, {}) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 7c039253d..8da531ed3 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1443,7 +1443,11 @@ def optimize_relational_tree( root = bubble_column_names(root) # Step 8: run projection pullup. + # print() + # print(root.to_tree_string()) root = confirm_root(pullup_projections(root)) + # print() + # print(root.to_tree_string()) # Step 9: re-run filter pushdown root._input = push_filters(root.input, set()) diff --git a/pydough/relational/rel_util.py b/pydough/relational/rel_util.py index 34f485325..91e5919f9 100644 --- a/pydough/relational/rel_util.py +++ b/pydough/relational/rel_util.py @@ -4,6 +4,7 @@ __all__ = [ "add_expr_uses", + "add_input_name", "apply_substitution", "bubble_uniqueness", "build_filter", @@ -764,3 +765,51 @@ def apply_substitution( # For all other cases, just return the expression as is. return expr + + +def add_input_name( + expr: RelationalExpression, input_name: str | None +) -> RelationalExpression: + """ + Adds an input name to all column references inside the given expression. + + Args: + `expr`: The expression to add the input name to its contents. + `input_name`: The input name to add. + + Returns: + The expression with the input name added to all contents, if + applicable. + """ + if isinstance(expr, ColumnReference): + return expr.with_input(input_name) + + # For call expressions, recursively transform the inputs. + if isinstance(expr, CallExpression): + return CallExpression( + expr.op, + expr.data_type, + [add_input_name(arg, input_name) for arg in expr.inputs], + ) + + # For window call expressions, recursively transform the inputs, partition + # inputs, and order inputs. + if isinstance(expr, WindowCallExpression): + return WindowCallExpression( + expr.op, + expr.data_type, + [add_input_name(arg, input_name) for arg in expr.inputs], + [add_input_name(arg, input_name) for arg in expr.partition_inputs], + [ + ExpressionSortInfo( + add_input_name(order_arg.expr, input_name), + order_arg.ascending, + order_arg.nulls_first, + ) + for order_arg in expr.order_inputs + ], + expr.kwargs, + ) + + # For all other cases, just return the expression as is. + return expr diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt index bd02f42ef..e4eb7a12f 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt @@ -4,12 +4,12 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', DEFAULT_ PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'year': t0.year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_custkey': o_custkey, 'year': YEAR(o_orderdate)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt index 1d39cc66b..8279c92ac 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt @@ -4,12 +4,12 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', n_rows), PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'year': t0.year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_custkey': o_custkey, 'year': YEAR(o_orderdate)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index f4f148cb8..f8dbb71a3 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('name', s_name), ('num_10parts', num_10parts), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', sum_price_of_10parts)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'num_10parts': t1.num_10parts, 's_name': t0.s_name, 'sum_price_of_10parts': t1.sum_price_of_10parts}) +ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(n_rows, 0:numeric)), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - PROJECT(columns={'avg_p_retailprice': avg_p_retailprice, 'num_10parts': DEFAULT_TO(n_rows, 0:numeric), 'ps_suppkey': ps_suppkey, 'sum_price_of_10parts': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) + PROJECT(columns={'avg_p_retailprice_1': avg_p_retailprice, 'n_rows': n_rows, 'ps_suppkey': ps_suppkey, 'sum_p_retailprice': sum_p_retailprice}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 0f0986ae7..8fa56b648 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / total_quantity)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'total_quantity': t0.total_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'total_quantity': t1.total_quantity}) +ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - PROJECT(columns={'l_orderkey': l_orderkey, 'total_quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index d0b6e3f6e..365ac3d0e 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,17 +1,18 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', revenue_generated)], orderings=[(revenue_generated):asc_first, (p_name):asc_first]) LIMIT(limit=Literal(value=4, type=NumericType()), columns={'p_name': p_name, 'revenue_generated': revenue_generated}, orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'revenue_generated': t0.revenue_generated}) - PROJECT(columns={'anything_ps_partkey': anything_ps_partkey, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + PROJECT(columns={'p_name': p_name, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) + JOIN(condition=t0.anything_ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + PROJECT(columns={'anything_ps_partkey_1': anything_ps_partkey, 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 061e03b43..82e33e815 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,17 +1,18 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', revenue_ratio)], orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) LIMIT(limit=Literal(value=3, type=NumericType()), columns={'p_name': p_name, 'revenue_ratio': revenue_ratio}, orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'revenue_ratio': t0.revenue_ratio}) - PROJECT(columns={'anything_ps_partkey': anything_ps_partkey, 'revenue_ratio': ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + PROJECT(columns={'p_name': p_name, 'revenue_ratio': ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)}) + JOIN(condition=t0.anything_ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + PROJECT(columns={'anything_ps_partkey_1': anything_ps_partkey, 'sum_l_quantity': sum_l_quantity, 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt index 949ff8a23..7bc4311aa 100644 --- a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt +++ b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', avg_bal_without_debt_erasure)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_bal_without_debt_erasure': t1.avg_bal_without_debt_erasure, 'r_name': t0.r_name}) +ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', sum_sum_expr_1 / sum_count_expr_1)], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'avg_bal_without_debt_erasure': sum_sum_expr_1 / sum_count_expr_1, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(expr_1), 'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'c_nationkey': c_nationkey, 'expr_1': LARGEST(c_acctbal, 0:numeric)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(expr_1), 'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'c_nationkey': c_nationkey, 'expr_1': LARGEST(c_acctbal, 0:numeric)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index ef6c8f5ca..1f5ca48a2 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows_1, 0:numeric)}) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows_1': n_rows_1}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows_1': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index ef6c8f5ca..1f5ca48a2 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows_1, 0:numeric)}) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows_1': n_rows_1}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows_1': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index e595a8632..94e733398 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -2,8 +2,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations_0': t0.n_nations_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey}) + PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 00ebd7afd..2e01acf9c 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,10 +1,10 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', qty_shipped)], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'qty_shipped': t1.qty_shipped, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric), 's_name': s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index ee63e2821..11b514f75 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,16 +1,16 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_no_tax_discount)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t0.n_no_tax_discount, 'n_orders': t0.n_orders}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t1.n_no_tax_discount, 'n_orders': t0.n_orders}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - PROJECT(columns={'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'o_custkey': o_custkey}) + PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'n_orders': n_orders}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 126aa69c8..23c6a811a 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 76bd980c0..4d1bb2447 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -2,12 +2,12 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations_0': t0.n_nations_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index b37c12250..34e0d2986 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,21 +1,22 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', n_orders), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18_0': t0.expr_18_0, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - PROJECT(columns={'expr_18_0': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_nations': n_nations, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_suppliers': n_suppliers, 'r_name': r_name, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index d379695ad..078ad6f20 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,26 +1,23 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', n_orders_94), ('n_orders_95', n_orders_95), ('n_orders_96', n_orders_96)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders_94': t1.n_orders_94, 'n_orders_95': t1.n_orders_95, 'n_orders_96': t1.n_orders_96, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_orders_94': DEFAULT_TO(sum_sum_expr_7, 0:numeric), 'n_orders_95': DEFAULT_TO(sum_sum_expr_10, 0:numeric), 'n_orders_96': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) - PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + PROJECT(columns={'n_nations': n_nations, 'n_orders_94': DEFAULT_TO(sum_sum_expr_7, 0:numeric), 'n_orders_95': DEFAULT_TO(sum_sum_expr_10, 0:numeric), 'n_orders_96': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_suppliers': n_suppliers, 'r_name': r_name, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index d39cb9f8f..ba7632a6d 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -2,8 +2,8 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index c71a26a59..dd8ca64e5 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -2,12 +2,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) - PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 171bc4f3e..848a95bb5 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -2,12 +2,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.agg_2, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'agg_2': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + PROJECT(columns={'agg_2': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 17e99a1c8..eb6de35e5 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,21 +1,22 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_0, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_0': t0.agg_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'agg_0': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18_0': t0.expr_18_0, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - PROJECT(columns={'expr_18_0': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_nations': n_nations, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'r_name': r_name, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index 0f7899049..22743c469 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', n_selected_orders)], orderings=[(n_rows):desc_last, (n_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_rows': n_rows, 'n_selected_orders': n_selected_orders}, orderings=[(n_rows):desc_last, (n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'n_selected_orders': t1.n_selected_orders}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'n_selected_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) + PROJECT(columns={'n_name': n_name, 'n_rows': n_rows, 'n_selected_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 80566e1b0..65b095b33 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,9 +1,9 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum), ('nation_name', n_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': n_selected_suppliers, 'selected_suppliers_avg': selected_suppliers_avg, 'selected_suppliers_sum': selected_suppliers_sum}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_selected_suppliers': t1.n_selected_suppliers, 'selected_suppliers_avg': t1.selected_suppliers_avg, 'selected_suppliers_sum': t1.selected_suppliers_sum}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_selected_suppliers': t0.n_selected_suppliers, 'selected_suppliers_avg': t0.selected_suppliers_avg, 'selected_suppliers_sum': t0.selected_suppliers_sum}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) @@ -15,5 +15,5 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', n_selected_supplie SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 642465175..03557163c 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,8 +1,8 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 9d56b7acd..4678a87a4 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', c_name), ('n_orders', n_orders), ('n_parts_ordered', n_parts_ordered), ('n_distinct_parts', n_distinct_parts)], orderings=[(ordering_3):asc_first, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_distinct_parts': n_distinct_parts, 'n_orders': n_orders, 'n_parts_ordered': n_parts_ordered, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_distinct_parts': t1.n_distinct_parts, 'n_orders': t0.n_orders, 'n_parts_ordered': t1.n_parts_ordered, 'ordering_3': t1.ordering_3}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_orders': t1.n_orders}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'o_custkey': o_custkey}) + PROJECT(columns={'c_name': c_name, 'n_distinct_parts': DEFAULT_TO(ndistinct_l_partkey, 0:numeric), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_parts_ordered': DEFAULT_TO(n_rows_1, 0:numeric), 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - PROJECT(columns={'n_distinct_parts': DEFAULT_TO(ndistinct_l_partkey, 0:numeric), 'n_parts_ordered': DEFAULT_TO(n_rows, 0:numeric), 'o_custkey': o_custkey, 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric)}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 7167a6bc8..355dc9ad3 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', total_spent)], orderings=[(total_spent):desc_last, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'total_spent': t1.total_spent}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_anything_l_extendedprice': MAX(anything_anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_o_totalprice': SUM(o_totalprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_anything_l_extendedprice': t1.anything_anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'sum_n_rows': t1.sum_n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index dec2085d2..9d8b97da5 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name_1, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate_1}) - PROJECT(columns={'c_name_1': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate_1': o_orderdate}) + FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) + PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 7b3cc0556..e350ba091 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('total_qty', total_qty)], orderings=[(total_qty):desc_last, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'total_qty': total_qty}, orderings=[(total_qty):desc_last, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'total_qty': t1.total_qty}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index dd060fac2..b76d2a813 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('total_qty', total_qty)], orderings=[(total_qty):desc_last, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'total_qty': total_qty}, orderings=[(total_qty):desc_last, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'total_qty': t1.total_qty}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 30bac545e..9a5054bce 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', c_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_orders': n_orders}, orderings=[(n_orders):desc_last, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_orders': t1.n_orders}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'o_custkey': o_custkey}) + PROJECT(columns={'c_name': c_name, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 7c453ab94..feffdfb1a 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,18 +1,17 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) + FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'ps_suppkey': ps_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_1': t1.expr_1, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index d31f6b6ff..6d7af3f9c 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,21 +1,20 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) + FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'ps_suppkey': ps_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_1': t1.expr_1, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 51fe077d4..20bcf38c9 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,6 +1,5 @@ ROOT(columns=[('fullname', fname)], orderings=[(fname):asc_first]) - PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(r_name), lname)}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'lname': t0.lname, 'r_name': t1.r_name}) - PROJECT(columns={'lname': LOWER(n_name), 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 900ec9029..5f6ca684d 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,12 +1,11 @@ ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': SUM(n_above_avg)}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_above_avg': COUNT()}) - FILTER(condition=o_totalprice >= 0.5:numeric * total_price_sum, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'total_price_sum': t0.total_price_sum}) - PROJECT(columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'total_price_sum': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) + FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index a480ec3e3..0887475a3 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,14 +1,15 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey}) + FILTER(condition=n_name == n_name, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 1217865d6..d179a88b7 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,8 +1,8 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orders_in_range)], orderings=[(year_7):asc_first, (month_6):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'month_6': ANYTHING(month), 'n_orders_in_range': COUNT(), 'year_7': ANYTHING(year)}) FILTER(condition=MONOTONIC(prev_month_avg_price, o_totalprice, avg_o_totalprice) | MONOTONIC(avg_o_totalprice, o_totalprice, prev_month_avg_price), columns={'month': month, 'year': year}) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) - PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) + JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice_1, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) + PROJECT(columns={'avg_o_totalprice_1': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index ba23e8e7a..e7c7cde56 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -2,8 +2,8 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric, 'o_orderkey': o_orderkey}) + PROJECT(columns={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) @@ -13,8 +13,8 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 1d43f55ec..ac9583af3 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -3,8 +3,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) + PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -13,8 +13,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 8c226d189..3f6839aaf 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -3,16 +3,16 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) + PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index f290101f4..29bf6bb22 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_above_avg_customers), ('n_above_avg_suppliers', n_above_avg_suppliers), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_above_avg_customers': t0.n_above_avg_customers, 'n_above_avg_suppliers': t1.n_above_avg_suppliers}) - JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_above_avg_customers': t0.n_above_avg_customers}) - PROJECT(columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_above_avg_customers': DEFAULT_TO(n_rows, 0:numeric)}) + PROJECT(columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'max_c_acctbal': max_c_acctbal, 'min_c_acctbal': min_c_acctbal, 'n_above_avg_customers': DEFAULT_TO(n_rows, 0:numeric), 'n_above_avg_suppliers': DEFAULT_TO(n_rows_1, 0:numeric)}) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -14,9 +14,8 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_n AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_above_avg_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 46f53cd79..9f4248442 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,20 +1,20 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': region_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'region_name': t0.region_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'region_name': t1.region_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - PROJECT(columns={'r_regionkey': r_regionkey, 'region_name': LOWER(r_name)}) + PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': LOWER(r_name)}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index 66be22e09..c868fedf4 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,17 +1,17 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(revenue), 'median_rev': MEDIAN(revenue), 'nation_name': ANYTHING(n_name)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'revenue': t0.revenue}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'revenue': t1.revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - PROJECT(columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) + PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index e1497fe3a..21bb4c0e6 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -6,16 +6,15 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': year}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'sum_l_extendedprice': t1.sum_l_extendedprice, 'year': t0.year}) - PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'o_orderkey': o_orderkey, 'year': year}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'year': t1.year}) + PROJECT(columns={'agg_0': 1:numeric, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'sum_l_extendedprice': sum_l_extendedprice, 'year': YEAR(o_orderdate)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/cumulative_stock_analysis.txt b/tests/test_plan_refsols/cumulative_stock_analysis.txt index 3d1c1865b..f0c0d24b5 100644 --- a/tests/test_plan_refsols/cumulative_stock_analysis.txt +++ b/tests/test_plan_refsols/cumulative_stock_analysis.txt @@ -1,7 +1,7 @@ ROOT(columns=[('date_time', sbTxDateTime), ('txn_within_day', txn_within_day), ('n_buys_within_day', n_buys_within_day), ('pct_apple_txns', pct_apple_txns), ('share_change', share_change), ('rolling_avg_amount', rolling_avg_amount)], orderings=[(sbTxDateTime):asc_first]) PROJECT(columns={'n_buys_within_day': RELCOUNT(args=[KEEP_IF(sbTxType, sbTxType == 'buy':string)], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True), 'pct_apple_txns': ROUND(100.0:numeric * RELSUM(args=[ISIN(sbTickerSymbol, ['AAPL', 'AMZN']:array[unknown])], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric), 'rolling_avg_amount': ROUND(RELAVG(args=[sbTxAmount], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric), 'sbTxDateTime': sbTxDateTime, 'share_change': RELSUM(args=[IFF(sbTxType == 'buy':string, sbTxShares, 0:numeric - sbTxShares)], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 'txn_within_day': RELSIZE(args=[], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType, 'txn_day': t0.txn_day}) - PROJECT(columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) + PROJECT(columns={'sbTickerSymbol': sbTickerSymbol, 'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxType': sbTxType, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) FILTER(condition=MONTH(sbTxDateTime) == 4:numeric & YEAR(sbTxDateTime) == 2023:numeric & sbTxStatus == 'success':string, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxStatus': sbTxStatus, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index d57301541..1fa05bd28 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('largest_diff', largest_diff)], orderings=[(largest_diff):desc_last]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'largest_diff': t1.largest_diff}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) - PROJECT(columns={'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff), 'o_custkey': o_custkey}) + PROJECT(columns={'c_name': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 7412f2b33..52700565c 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', c_name), ('total_recent_value', total_recent_value)], orderings=[(total_recent_value):desc_last]) LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'total_recent_value': t1.total_recent_value}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'o_custkey': o_custkey, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/dumb_aggregation.txt b/tests/test_plan_refsols/dumb_aggregation.txt index b72dc0295..b178db074 100644 --- a/tests/test_plan_refsols/dumb_aggregation.txt +++ b/tests/test_plan_refsols/dumb_aggregation.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', a3), ('a4', a4), ('a5', a5), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'a3': t1.a3, 'a4': t1.a4, 'a5': t1.a5, 'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) - PROJECT(columns={'a3': DEFAULT_TO(r_regionkey, 0:numeric), 'a4': IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric), 'a5': 1:numeric, 'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'a3': DEFAULT_TO(r_regionkey, 0:numeric), 'a4': IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric), 'a5': 1:numeric, 'n_name': n_name, 'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index be5415490..a7a2bb160 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,15 +1,15 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', event_year), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': event_year, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 'event_year': t0.event_year, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 'event_year': t0.event_year, 's_name': t1.s_name}) - PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'event_year': YEAR(ev_dt)}) + PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': YEAR(ev_dt), 's_name': s_name, 't_name': t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/global_calc_backref.txt b/tests/test_plan_refsols/global_calc_backref.txt index 4d4e194ef..6b7d52375 100644 --- a/tests/test_plan_refsols/global_calc_backref.txt +++ b/tests/test_plan_refsols/global_calc_backref.txt @@ -1,5 +1,2 @@ -ROOT(columns=[('part_name', p_name), ('is_above_cutoff', p_retailprice > a), ('is_nickel', CONTAINS(p_type, b))], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'a': t0.a, 'b': t0.b, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) - PROJECT(columns={'a': 28.15:numeric, 'b': 'NICKEL':string}) - EMPTYSINGLETON() - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) +ROOT(columns=[('part_name', p_name), ('is_above_cutoff', p_retailprice > 28.15:numeric), ('is_nickel', CONTAINS(p_type, 'NICKEL':string))], orderings=[]) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/hour_minute_day.txt b/tests/test_plan_refsols/hour_minute_day.txt index bfacee546..0ad3b9efe 100644 --- a/tests/test_plan_refsols/hour_minute_day.txt +++ b/tests/test_plan_refsols/hour_minute_day.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('_expr0', _expr0), ('_expr1', _expr1), ('_expr2', _expr2)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'_expr0': t0._expr0, '_expr1': t0._expr1, '_expr2': t0._expr2, 'sbTxId': t0.sbTxId}) - PROJECT(columns={'_expr0': HOUR(sbTxDateTime), '_expr1': MINUTE(sbTxDateTime), '_expr2': SECOND(sbTxDateTime), 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) + PROJECT(columns={'_expr0': HOUR(sbTxDateTime), '_expr1': MINUTE(sbTxDateTime), '_expr2': SECOND(sbTxDateTime), 'sbTxId': sbTxId}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) - FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) + FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index cd92282e3..c9f1900fe 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('ship_year', ship_year), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', value)], orderings=[]) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ship_year': t0.ship_year, 'supplier_nation': t0.n_name, 'value': t0.value}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name, 'ship_year': t0.ship_year, 'value': t0.value}) - PROJECT(columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'ship_year': YEAR(l_shipdate), 'value': l_extendedprice * 1.0:numeric - l_discount}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt index 6d97a53f1..e06f975c8 100644 --- a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt +++ b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('order_year', order_year), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'order_year': t0.order_year, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'order_year': t0.order_year, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'order_year': t1.order_year, 'r_name': t0.r_name}) +ROOT(columns=[('order_year', YEAR(o_orderdate)), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'order_year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index a0098dd14..d438e6e1e 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'month_total_spent': t1.month_total_spent, 'year': t1.year}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) + JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) @@ -9,8 +9,7 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (m PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index 21b348784..d185fd519 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'suppliers_in_black': t0.suppliers_in_black, 'total_suppliers': t1.total_suppliers}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric)}) + PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index b21fc51af..bca9f137e 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - PROJECT(columns={'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxType': sbTxType}) + PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) @@ -10,12 +10,12 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index 5f21161b4..6227e214c 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,10 +1,11 @@ -ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', total_consumer_value_a), ('total_supplier_value', total_supplier_value_a), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'n_nationkey': t0.n_nationkey, 'total_consumer_value_a': t0.total_consumer_value_a, 'total_supplier_value_a': t1.total_supplier_value_a}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'n_nationkey': t0.n_nationkey, 'total_consumer_value_a': t1.total_consumer_value_a}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - PROJECT(columns={'avg_c_acctbal': avg_c_acctbal, 'c_nationkey': c_nationkey, 'max_c_acctbal': max_c_acctbal, 'total_consumer_value_a': DEFAULT_TO(sum_c_acctbal, 0:numeric)}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal': max_s_acctbal, 's_nationkey': s_nationkey, 'total_supplier_value_a': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) +ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal_1, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + PROJECT(columns={'avg_c_acctbal_1': avg_c_acctbal, 'max_c_acctbal_1': max_c_acctbal, 'n_nationkey': n_nationkey, 'sum_c_acctbal': sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + PROJECT(columns={'avg_c_acctbal_1': avg_c_acctbal, 'c_nationkey': c_nationkey, 'max_c_acctbal_1': max_c_acctbal, 'sum_c_acctbal': sum_c_acctbal}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 's_nationkey': s_nationkey, 'sum_s_acctbal': sum_s_acctbal}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt index 5209b11c8..2e2b636f1 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt @@ -1,10 +1,8 @@ -ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', consumer_value), ('producer_value', producer_value)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'consumer_value': t0.consumer_value, 'n_nationkey': t0.n_nationkey, 'producer_value': t1.producer_value}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'consumer_value': t1.consumer_value, 'n_nationkey': t0.n_nationkey}) +ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - PROJECT(columns={'c_nationkey': c_nationkey, 'consumer_value': DEFAULT_TO(sum_c_acctbal, 0:numeric)}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'producer_value': DEFAULT_TO(sum_s_acctbal, 0:numeric), 's_nationkey': s_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_s_acctbal': SUM(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_s_acctbal': SUM(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index 738a980a1..caee91153 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,8 +4,8 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name_1, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) - PROJECT(columns={'c_name_1': c_name, 'c_nationkey': c_nationkey, 'o_orderkey_1': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index 12ee5e43d..7706c56b5 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'suppliers_in_black': t0.suppliers_in_black, 'total_suppliers': t1.total_suppliers}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) +ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/orders_sum_line_price.txt b/tests/test_plan_refsols/orders_sum_line_price.txt index 42819ba5f..c3c7ca09c 100644 --- a/tests/test_plan_refsols/orders_sum_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_line_price.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('okey', o_orderkey), ('lsum', lsum)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'lsum': t1.lsum, 'o_orderkey': t0.o_orderkey}) +ROOT(columns=[('okey', o_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - PROJECT(columns={'l_orderkey': l_orderkey, 'lsum': DEFAULT_TO(sum_l_extendedprice, 0:numeric)}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt index 766ea2613..139b7720a 100644 --- a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('okey', o_orderkey), ('lavg', lavg)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'lavg': t1.lavg, 'o_orderkey': t0.o_orderkey}) +ROOT(columns=[('okey', o_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - PROJECT(columns={'l_orderkey': l_orderkey, 'lavg': DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 5f411333a..3b22ff6b8 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,8 +1,8 @@ ROOT(columns=[('reduced_size', reduced_size), ('retail_price_int', retail_price_int), ('message', message), ('discount', l_discount), ('date_dmy', date_dmy), ('date_md', date_md), ('am_pm', am_pm)], orderings=[(l_discount):desc_last]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'am_pm': am_pm, 'date_dmy': date_dmy, 'date_md': date_md, 'l_discount': l_discount, 'message': message, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(l_discount):desc_last]) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'am_pm': t1.am_pm, 'date_dmy': t1.date_dmy, 'date_md': t1.date_md, 'l_discount': t1.l_discount, 'message': t0.message, 'reduced_size': t0.reduced_size, 'retail_price_int': t0.retail_price_int}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'message': message, 'p_partkey': p_partkey, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'message': JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size)), 'p_partkey': p_partkey, 'reduced_size': FLOAT(p_size / 2.5:numeric), 'retail_price_int': INTEGER(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) - PROJECT(columns={'am_pm': STRING(l_receiptdate, '%H:%M%p':string), 'date_dmy': STRING(l_receiptdate, '%d-%m-%Y':string), 'date_md': STRING(l_receiptdate, '%m/%d':string), 'l_discount': l_discount, 'l_partkey': l_partkey}) + PROJECT(columns={'am_pm': STRING(l_receiptdate, '%H:%M%p':string), 'date_dmy': STRING(l_receiptdate, '%d-%m-%Y':string), 'date_md': STRING(l_receiptdate, '%m/%d':string), 'l_discount': l_discount, 'message': message, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'message': t0.message, 'reduced_size': t0.reduced_size, 'retail_price_int': t0.retail_price_int}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'message': message, 'p_partkey': p_partkey, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) + PROJECT(columns={'message': JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size)), 'p_partkey': p_partkey, 'reduced_size': FLOAT(p_size / 2.5:numeric), 'retail_price_int': INTEGER(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/region_acctbal_breakdown.txt b/tests/test_plan_refsols/region_acctbal_breakdown.txt index 0cee405f0..1dd3998d9 100644 --- a/tests/test_plan_refsols/region_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/region_acctbal_breakdown.txt @@ -2,7 +2,7 @@ ROOT(columns=[('region_name', r_name), ('n_red_acctbal', n_red_acctbal), ('n_bla JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(non_negative_acctbal), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(negative_acctbal), 'n_black_acctbal': COUNT(non_negative_acctbal), 'n_red_acctbal': COUNT(negative_acctbal)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey, 'negative_acctbal': t1.negative_acctbal, 'non_negative_acctbal': t1.non_negative_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) + PROJECT(columns={'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 4a5f82679..223d5aab8 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -5,12 +5,14 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'quantity': t0.quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'quantity': t1.quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_rows': n_rows, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows_1, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) + PROJECT(columns={'n_rows_1': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_rows_1': n_rows, 'sum_l_quantity': sum_l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index fa891ae1e..acb036880 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -2,15 +2,15 @@ ROOT(columns=[('year', release_year), ('ir', ir)], orderings=[(release_year):asc PROJECT(columns={'ir': ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric), 'release_year': release_year}) JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) AGGREGATE(keys={'release_year': release_year}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'release_year': t1.release_year}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) - PROJECT(columns={'pr_id': pr_id, 'release_year': YEAR(pr_release)}) + PROJECT(columns={'n_rows': n_rows, 'release_year': YEAR(pr_release)}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'release_year': t0.release_year}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'release_year': t1.release_year}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - PROJECT(columns={'pr_id': pr_id, 'release_year': YEAR(pr_release)}) + PROJECT(columns={'release_year': YEAR(pr_release)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index ab43e7a9a..6a96f0f3d 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,29 +1,29 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) PROJECT(columns={'ir': ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric), 'month': month, 'month_0': JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': year}) + PROJECT(columns={'expr_3': expr_3, 'month': MONTH(ca_dt), 'n_rows': n_rows, 'year': year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 931f72ef9..df4147ff6 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,8 +1,8 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ir)], orderings=[(ir):desc_last]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}, orderings=[(ir):desc_last]) - JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ir': t1.ir, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type}) - SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) - PROJECT(columns={'de_product_id': de_product_id, 'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)}) + PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}) + JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) + SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) PROJECT(columns={'de_product_id': de_product_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 3bbbef4f1..191147ea3 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -7,23 +7,23 @@ ROOT(columns=[('years_since_release', years_since_release), ('cum_ir', cum_ir), FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_4': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index e333d030c..f4222569e 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -4,15 +4,15 @@ ROOT(columns=[('yr', year), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index 2dbde582e..f92f003ec 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,14 +1,14 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'REVENUE': t0.REVENUE, 'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name}) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'expr_1': t1.expr_1, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 575bf0832..e81f6331d 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,22 +1,21 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}, orderings=[(VALUE):desc_last]) - FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'VALUE': t1.VALUE, 'min_market_share': t0.min_market_share, 'ps_partkey': t1.ps_partkey}) - PROJECT(columns={'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric}) + FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'metric': t0.metric}) - PROJECT(columns={'metric': ps_supplycost * ps_availqty, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'metric': ps_supplycost * ps_availqty}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'expr_2': t0.expr_2, 'ps_partkey': t0.ps_partkey}) - PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q12.txt b/tests/test_plan_refsols/tpch_q12.txt index 821150be7..6a11fe1ab 100644 --- a/tests/test_plan_refsols/tpch_q12.txt +++ b/tests/test_plan_refsols/tpch_q12.txt @@ -1,8 +1,8 @@ ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', HIGH_LINE_COUNT), ('LOW_LINE_COUNT', LOW_LINE_COUNT)], orderings=[(l_shipmode):asc_first]) PROJECT(columns={'HIGH_LINE_COUNT': DEFAULT_TO(sum_is_high_priority, 0:numeric), 'LOW_LINE_COUNT': DEFAULT_TO(sum_expr_2, 0:numeric), 'l_shipmode': l_shipmode}) AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(expr_2), 'sum_is_high_priority': SUM(is_high_priority)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_2': t1.expr_2, 'is_high_priority': t1.is_high_priority, 'l_shipmode': t0.l_shipmode}) - FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) - PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'o_orderkey': o_orderkey}) + PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'l_shipmode': l_shipmode}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) + FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index 4d4ff236b..877cbcd22 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,17 +1,18 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', TOTAL_REVENUE)], orderings=[(s_suppkey):asc_first]) - JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'TOTAL_REVENUE': t1.TOTAL_REVENUE, 's_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'total_revenue': t1.total_revenue}) - SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) - PROJECT(columns={'l_suppkey': l_suppkey, 'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) - PROJECT(columns={'TOTAL_REVENUE': DEFAULT_TO(sum_expr_3, 0:numeric), 'l_suppkey': l_suppkey, 'sum_expr_3': sum_expr_3}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) - PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'TOTAL_REVENUE': DEFAULT_TO(sum_expr_3, 0:numeric), 's_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) + JOIN(condition=DEFAULT_TO(t1.sum_expr_3_1, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) + PROJECT(columns={'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) + PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) + PROJECT(columns={'l_suppkey': l_suppkey, 'sum_expr_3': sum_expr_3, 'sum_expr_3_1': sum_expr_3}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) + PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index fc34417b8..8de1daa53 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,10 +1,10 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'TOTAL_QUANTITY': t1.TOTAL_QUANTITY, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'l_orderkey': l_orderkey}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'l_orderkey': l_orderkey}) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index f025d4504..81644d8d2 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -7,13 +7,12 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'part_qty': t1.part_qty, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'part_qty': t1.part_qty}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - PROJECT(columns={'l_partkey': l_partkey, 'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index e80501af0..61ed28ec5 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -2,12 +2,12 @@ ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', REVENUE), ('O_ORDERDATE', LIMIT(limit=Literal(value=10, type=NumericType()), columns={'REVENUE': REVENUE, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, orderings=[(REVENUE):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first]) PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(expr_1)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'expr_1': t1.expr_1, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) - FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index a6f56996d..59ff61eef 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,19 +1,19 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', REVENUE)], orderings=[(REVENUE):desc_last]) PROJECT(columns={'REVENUE': DEFAULT_TO(sum_value, 0:numeric), 'anything_n_name': anything_n_name}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'value': t0.value}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'value': t1.value}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - PROJECT(columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'value': l_extendedprice * 1:numeric - l_discount}) + PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index 0d535541a..b4f612231 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', avg_bal), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_bal': t1.avg_bal, 'count_c_acctbal': t1.count_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) +ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'avg_bal': DEFAULT_TO(avg_c_acctbal, 0:numeric), 'c_nationkey': c_nationkey, 'count_c_acctbal': count_c_acctbal, 'max_c_acctbal': max_c_acctbal, 'min_c_acctbal': min_c_acctbal, 'n_rows': n_rows, 'sum_c_acctbal': sum_c_acctbal}) + PROJECT(columns={'avg_c_acctbal': avg_c_acctbal, 'c_nationkey': c_nationkey, 'count_c_acctbal_1': count_c_acctbal, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows_1': n_rows, 'sum_c_acctbal_1': sum_c_acctbal}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal': COUNT(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index c436e164f..28100ecde 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,9 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice_1 < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - PROJECT(columns={'o_totalprice_1': o_totalprice}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) - FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/year_month_nation_orders.txt b/tests/test_plan_refsols/year_month_nation_orders.txt index 545218244..99d446335 100644 --- a/tests/test_plan_refsols/year_month_nation_orders.txt +++ b/tests/test_plan_refsols/year_month_nation_orders.txt @@ -1,13 +1,13 @@ ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_orders': n_orders, 'order_month': order_month, 'order_year': order_year}, orderings=[(n_orders):desc_last]) AGGREGATE(keys={'n_name': n_name, 'order_month': order_month, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'order_month': t1.order_month, 'order_year': t1.order_year}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) - FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) + PROJECT(columns={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) + FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_sql_refsols/defog_broker_adv16_ansi.sql b/tests/test_sql_refsols/defog_broker_adv16_ansi.sql index dcfc73604..383398ff4 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_ansi.sql @@ -1,10 +1,7 @@ WITH _s1 AS ( SELECT - ( - 100.0 * ( - COALESCE(SUM(sbtxamount), 0) - COALESCE(SUM(sbtxtax + sbtxcommission), 0) - ) - ) / COALESCE(SUM(sbtxamount), 0) AS spm, + SUM(sbtxtax + sbtxcommission) AS sum_expr_2, + SUM(sbtxamount) AS sum_sbtxamount, sbtxtickerid FROM main.sbtransaction WHERE @@ -14,7 +11,11 @@ WITH _s1 AS ( ) SELECT sbticker.sbtickersymbol AS symbol, - _s1.spm AS SPM + ( + 100.0 * ( + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr_2, 0) + ) + ) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM FROM main.sbticker AS sbticker JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid diff --git a/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql index e57a0b702..0e6847d55 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql @@ -1,10 +1,7 @@ WITH _s1 AS ( SELECT - CAST(( - 100.0 * ( - COALESCE(SUM(sbtxamount), 0) - COALESCE(SUM(sbtxtax + sbtxcommission), 0) - ) - ) AS REAL) / COALESCE(SUM(sbtxamount), 0) AS spm, + SUM(sbtxtax + sbtxcommission) AS sum_expr_2, + SUM(sbtxamount) AS sum_sbtxamount, sbtxtickerid FROM main.sbtransaction WHERE @@ -14,7 +11,11 @@ WITH _s1 AS ( ) SELECT sbticker.sbtickersymbol AS symbol, - _s1.spm AS SPM + CAST(( + 100.0 * ( + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr_2, 0) + ) + ) AS REAL) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM FROM main.sbticker AS sbticker JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 6c4265832..9a9471dea 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -1,7 +1,7 @@ WITH _s1 AS ( SELECT - COALESCE(SUM(sale_price), 0) AS total_revenue, - COUNT(*) AS n_rows, + COUNT(*) AS n_rows_1, + SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales WHERE @@ -12,11 +12,11 @@ WITH _s1 AS ( SELECT salespersons.first_name, salespersons.last_name, - _s1.n_rows AS total_sales, - _s1.total_revenue + _s1.n_rows_1 AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - _s1.n_rows DESC + _s1.n_rows_1 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index 2856c1f24..9f797c2bc 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -1,7 +1,7 @@ WITH _s1 AS ( SELECT - COALESCE(SUM(sale_price), 0) AS total_revenue, - COUNT(*) AS n_rows, + COUNT(*) AS n_rows_1, + SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales WHERE @@ -14,11 +14,11 @@ WITH _s1 AS ( SELECT salespersons.first_name, salespersons.last_name, - _s1.n_rows AS total_sales, - _s1.total_revenue + _s1.n_rows_1 AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - _s1.n_rows DESC + _s1.n_rows_1 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql index 2cfb71c59..af2c3f84f 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql @@ -1,11 +1,8 @@ WITH _s1 AS ( SELECT - COALESCE( - SUM( - DATEDIFF(CAST(session_end_ts AS DATETIME), CAST(session_start_ts AS DATETIME), SECOND) - ), - 0 - ) AS total_duration, + SUM( + DATEDIFF(CAST(session_end_ts AS DATETIME), CAST(session_start_ts AS DATETIME), SECOND) + ) AS sum_duration, user_id FROM main.user_sessions WHERE @@ -15,7 +12,7 @@ WITH _s1 AS ( ) SELECT users.uid, - _s1.total_duration + COALESCE(_s1.sum_duration, 0) AS total_duration FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql index 793b9b9b3..93fddddfd 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql @@ -1,17 +1,14 @@ WITH _s1 AS ( SELECT - COALESCE( - SUM( + SUM( + ( ( - ( - CAST(( - JULIANDAY(DATE(session_end_ts, 'start of day')) - JULIANDAY(DATE(session_start_ts, 'start of day')) - ) AS INTEGER) * 24 + CAST(STRFTIME('%H', session_end_ts) AS INTEGER) - CAST(STRFTIME('%H', session_start_ts) AS INTEGER) - ) * 60 + CAST(STRFTIME('%M', session_end_ts) AS INTEGER) - CAST(STRFTIME('%M', session_start_ts) AS INTEGER) - ) * 60 + CAST(STRFTIME('%S', session_end_ts) AS INTEGER) - CAST(STRFTIME('%S', session_start_ts) AS INTEGER) - ), - 0 - ) AS total_duration, + CAST(( + JULIANDAY(DATE(session_end_ts, 'start of day')) - JULIANDAY(DATE(session_start_ts, 'start of day')) + ) AS INTEGER) * 24 + CAST(STRFTIME('%H', session_end_ts) AS INTEGER) - CAST(STRFTIME('%H', session_start_ts) AS INTEGER) + ) * 60 + CAST(STRFTIME('%M', session_end_ts) AS INTEGER) - CAST(STRFTIME('%M', session_start_ts) AS INTEGER) + ) * 60 + CAST(STRFTIME('%S', session_end_ts) AS INTEGER) - CAST(STRFTIME('%S', session_start_ts) AS INTEGER) + ) AS sum_duration, user_id FROM main.user_sessions WHERE @@ -21,7 +18,7 @@ WITH _s1 AS ( ) SELECT users.uid, - _s1.total_duration + COALESCE(_s1.sum_duration, 0) AS total_duration FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql index bd2b54392..7a8882408 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql @@ -5,7 +5,7 @@ WITH _s0 AS ( FROM main.devices GROUP BY de_product_id -), _t4 AS ( +), _s1 AS ( SELECT pr_id, pr_release @@ -13,23 +13,23 @@ WITH _s0 AS ( ), _s6 AS ( SELECT SUM(_s0.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_t4.pr_release AS DATETIME)) AS release_year + EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS release_year FROM _s0 AS _s0 - JOIN _t4 AS _t4 - ON _s0.de_product_id = _t4.pr_id + JOIN _s1 AS _s1 + ON _s0.de_product_id = _s1.pr_id GROUP BY - EXTRACT(YEAR FROM CAST(_t4.pr_release AS DATETIME)) + EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) ), _s7 AS ( SELECT COUNT(*) AS n_rows, - EXTRACT(YEAR FROM CAST(_t6.pr_release AS DATETIME)) AS release_year + EXTRACT(YEAR FROM CAST(_s3.pr_release AS DATETIME)) AS release_year FROM main.devices AS devices - JOIN _t4 AS _t6 - ON _t6.pr_id = devices.de_product_id + JOIN _s1 AS _s3 + ON _s3.pr_id = devices.de_product_id JOIN main.incidents AS incidents ON devices.de_id = incidents.in_device_id GROUP BY - EXTRACT(YEAR FROM CAST(_t6.pr_release AS DATETIME)) + EXTRACT(YEAR FROM CAST(_s3.pr_release AS DATETIME)) ) SELECT _s6.release_year AS year, diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql index 0f6777f90..5ef83cf6a 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql @@ -5,7 +5,7 @@ WITH _s0 AS ( FROM main.devices GROUP BY de_product_id -), _t4 AS ( +), _s1 AS ( SELECT pr_id, pr_release @@ -13,23 +13,23 @@ WITH _s0 AS ( ), _s6 AS ( SELECT SUM(_s0.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _t4.pr_release) AS INTEGER) AS release_year + CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS release_year FROM _s0 AS _s0 - JOIN _t4 AS _t4 - ON _s0.de_product_id = _t4.pr_id + JOIN _s1 AS _s1 + ON _s0.de_product_id = _s1.pr_id GROUP BY - CAST(STRFTIME('%Y', _t4.pr_release) AS INTEGER) + CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) ), _s7 AS ( SELECT COUNT(*) AS n_rows, - CAST(STRFTIME('%Y', _t6.pr_release) AS INTEGER) AS release_year + CAST(STRFTIME('%Y', _s3.pr_release) AS INTEGER) AS release_year FROM main.devices AS devices - JOIN _t4 AS _t6 - ON _t6.pr_id = devices.de_product_id + JOIN _s1 AS _s3 + ON _s3.pr_id = devices.de_product_id JOIN main.incidents AS incidents ON devices.de_id = incidents.in_device_id GROUP BY - CAST(STRFTIME('%Y', _t6.pr_release) AS INTEGER) + CAST(STRFTIME('%Y', _s3.pr_release) AS INTEGER) ) SELECT _s6.release_year AS year, diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql index 4bf563b04..d4bcd9d17 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql @@ -7,7 +7,8 @@ WITH _s3 AS ( in_device_id ), _s5 AS ( SELECT - ROUND(COALESCE(SUM(COALESCE(_s3.n_rows, 0)), 0) / COUNT(*), 2) AS ir, + COUNT(*) AS n_rows, + SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, devices.de_product_id FROM main.devices AS devices JOIN main.products AS products @@ -21,7 +22,7 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - _s5.ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql index 268abbb3c..5a99bc7fc 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql @@ -7,7 +7,8 @@ WITH _s3 AS ( in_device_id ), _s5 AS ( SELECT - ROUND(CAST(COALESCE(SUM(COALESCE(_s3.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) AS ir, + COUNT(*) AS n_rows, + SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, devices.de_product_id FROM main.devices AS devices JOIN main.products AS products @@ -21,7 +22,7 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - _s5.ir + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql index 1f8c08428..712284e0c 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql @@ -4,7 +4,7 @@ WITH _s14 AS ( FROM main.products WHERE pr_name = 'GoldCopper-Star' -), _t6 AS ( +), _s6 AS ( SELECT ca_dt FROM main.calendar @@ -19,7 +19,7 @@ WITH _s14 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t6 AS _s0 + FROM _s6 AS _s0 JOIN main.incidents AS incidents ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices @@ -32,7 +32,7 @@ WITH _s14 AS ( SELECT COUNT(*) AS n_rows, _s8.ca_dt - FROM _t6 AS _s8 + FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) JOIN _t8 AS _t10 @@ -43,14 +43,14 @@ WITH _s14 AS ( SELECT SUM(_s7.n_rows) AS sum_expr_4, SUM(_s13.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) AS year - FROM _t6 AS _t6 + EXTRACT(YEAR FROM CAST(_s6.ca_dt AS DATETIME)) AS year + FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t6.ca_dt + ON _s6.ca_dt = _s7.ca_dt LEFT JOIN _s13 AS _s13 - ON _s13.ca_dt = _t6.ca_dt + ON _s13.ca_dt = _s6.ca_dt GROUP BY - EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) + EXTRACT(YEAR FROM CAST(_s6.ca_dt AS DATETIME)) ), _t0 AS ( SELECT ROUND( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql index 4611d884e..e49cda5d1 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql @@ -4,7 +4,7 @@ WITH _s14 AS ( FROM main.products WHERE pr_name = 'GoldCopper-Star' -), _t6 AS ( +), _s6 AS ( SELECT ca_dt FROM main.calendar @@ -19,7 +19,7 @@ WITH _s14 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t6 AS _s0 + FROM _s6 AS _s0 JOIN main.incidents AS incidents ON _s0.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices @@ -32,7 +32,7 @@ WITH _s14 AS ( SELECT COUNT(*) AS n_rows, _s8.ca_dt - FROM _t6 AS _s8 + FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE(devices.de_purchase_ts, 'start of day') JOIN _t8 AS _t10 @@ -43,14 +43,14 @@ WITH _s14 AS ( SELECT SUM(_s7.n_rows) AS sum_expr_4, SUM(_s13.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) AS year - FROM _t6 AS _t6 + CAST(STRFTIME('%Y', _s6.ca_dt) AS INTEGER) AS year + FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t6.ca_dt + ON _s6.ca_dt = _s7.ca_dt LEFT JOIN _s13 AS _s13 - ON _s13.ca_dt = _t6.ca_dt + ON _s13.ca_dt = _s6.ca_dt GROUP BY - CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) + CAST(STRFTIME('%Y', _s6.ca_dt) AS INTEGER) ), _t0 AS ( SELECT ROUND( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index a296d1e1b..337d437df 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -1,4 +1,4 @@ -WITH _t6 AS ( +WITH _s2 AS ( SELECT ca_dt FROM main.calendar @@ -6,7 +6,7 @@ WITH _t6 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t6 AS _s0 + FROM _s2 AS _s0 JOIN main.devices AS devices ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) GROUP BY @@ -15,7 +15,7 @@ WITH _t6 AS ( SELECT COUNT(*) AS n_rows, _s4.ca_dt - FROM _t6 AS _s4 + FROM _s2 AS _s4 JOIN main.incidents AS incidents ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) GROUP BY @@ -24,14 +24,14 @@ WITH _t6 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) AS year - FROM _t6 AS _t6 + EXTRACT(YEAR FROM CAST(_s2.ca_dt AS DATETIME)) AS year + FROM _s2 AS _s2 LEFT JOIN _s3 AS _s3 - ON _s3.ca_dt = _t6.ca_dt + ON _s2.ca_dt = _s3.ca_dt LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t6.ca_dt + ON _s2.ca_dt = _s7.ca_dt GROUP BY - EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) + EXTRACT(YEAR FROM CAST(_s2.ca_dt AS DATETIME)) ), _t0 AS ( SELECT ROUND( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index 783a358a4..61f47822c 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t6 AS ( +WITH _s2 AS ( SELECT ca_dt FROM main.calendar @@ -6,7 +6,7 @@ WITH _t6 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t6 AS _s0 + FROM _s2 AS _s0 JOIN main.devices AS devices ON _s0.ca_dt = DATE(devices.de_purchase_ts, 'start of day') GROUP BY @@ -15,7 +15,7 @@ WITH _t6 AS ( SELECT COUNT(*) AS n_rows, _s4.ca_dt - FROM _t6 AS _s4 + FROM _s2 AS _s4 JOIN main.incidents AS incidents ON _s4.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') GROUP BY @@ -24,14 +24,14 @@ WITH _t6 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) AS year - FROM _t6 AS _t6 + CAST(STRFTIME('%Y', _s2.ca_dt) AS INTEGER) AS year + FROM _s2 AS _s2 LEFT JOIN _s3 AS _s3 - ON _s3.ca_dt = _t6.ca_dt + ON _s2.ca_dt = _s3.ca_dt LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t6.ca_dt + ON _s2.ca_dt = _s7.ca_dt GROUP BY - CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) + CAST(STRFTIME('%Y', _s2.ca_dt) AS INTEGER) ), _t0 AS ( SELECT ROUND( diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index 6659c086b..f76f36d96 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - COALESCE(SUM(partsupp.ps_supplycost * partsupp.ps_availqty), 0) * 0.0001 AS min_market_share + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey @@ -20,22 +20,24 @@ WITH _s0 AS ( ON _s0.s_nationkey = _t4.n_nationkey ), _s9 AS ( SELECT - COALESCE(SUM(partsupp.ps_supplycost * partsupp.ps_availqty), 0) AS value, + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, partsupp.ps_partkey FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t8 - ON _s4.s_nationkey = _t8.n_nationkey + JOIN _t4 AS _t7 + ON _s4.s_nationkey = _t7.n_nationkey GROUP BY partsupp.ps_partkey ) SELECT _s9.ps_partkey AS PS_PARTKEY, - _s9.value AS VALUE + COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON _s8.min_market_share < _s9.value + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index 6659c086b..f76f36d96 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - COALESCE(SUM(partsupp.ps_supplycost * partsupp.ps_availqty), 0) * 0.0001 AS min_market_share + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey @@ -20,22 +20,24 @@ WITH _s0 AS ( ON _s0.s_nationkey = _t4.n_nationkey ), _s9 AS ( SELECT - COALESCE(SUM(partsupp.ps_supplycost * partsupp.ps_availqty), 0) AS value, + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, partsupp.ps_partkey FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t8 - ON _s4.s_nationkey = _t8.n_nationkey + JOIN _t4 AS _t7 + ON _s4.s_nationkey = _t7.n_nationkey GROUP BY partsupp.ps_partkey ) SELECT _s9.ps_partkey AS PS_PARTKEY, - _s9.value AS VALUE + COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON _s8.min_market_share < _s9.value + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q15_ansi.sql b/tests/test_sql_refsols/tpch_q15_ansi.sql index b5292b051..8bee61545 100644 --- a/tests/test_sql_refsols/tpch_q15_ansi.sql +++ b/tests/test_sql_refsols/tpch_q15_ansi.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t6 AS ( SELECT l_discount, l_extendedprice, @@ -10,29 +10,29 @@ WITH _t5 AS ( AND l_shipdate >= CAST('1996-01-01' AS DATE) ), _s1 AS ( SELECT - COALESCE(SUM(l_extendedprice * ( + SUM(l_extendedprice * ( 1 - l_discount - )), 0) AS total_revenue, + )) AS sum_expr_2, l_suppkey - FROM _t5 + FROM _t6 GROUP BY l_suppkey ), _s2 AS ( SELECT - MAX(_s1.total_revenue) AS max_revenue + MAX(COALESCE(_s1.sum_expr_2, 0)) AS max_revenue FROM tpch.supplier AS supplier JOIN _s1 AS _s1 ON _s1.l_suppkey = supplier.s_suppkey ), _s5 AS ( SELECT - COALESCE(SUM(l_extendedprice * ( + SUM(l_extendedprice * ( 1 - l_discount - )), 0) AS total_revenue, - l_suppkey, + )) AS sum_expr_3, SUM(l_extendedprice * ( 1 - l_discount - )) AS sum_expr_3 - FROM _t5 + )) AS sum_expr_3_1, + l_suppkey + FROM _t6 GROUP BY l_suppkey ) @@ -41,11 +41,11 @@ SELECT supplier.s_name AS S_NAME, supplier.s_address AS S_ADDRESS, supplier.s_phone AS S_PHONE, - _s5.total_revenue AS TOTAL_REVENUE + COALESCE(_s5.sum_expr_3, 0) AS TOTAL_REVENUE FROM _s2 AS _s2 CROSS JOIN tpch.supplier AS supplier JOIN _s5 AS _s5 - ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3_1, 0) AND _s5.l_suppkey = supplier.s_suppkey ORDER BY s_suppkey diff --git a/tests/test_sql_refsols/tpch_q15_sqlite.sql b/tests/test_sql_refsols/tpch_q15_sqlite.sql index ecc0ebcf8..25340d6a8 100644 --- a/tests/test_sql_refsols/tpch_q15_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q15_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t6 AS ( SELECT l_discount, l_extendedprice, @@ -9,29 +9,29 @@ WITH _t5 AS ( l_shipdate < '1996-04-01' AND l_shipdate >= '1996-01-01' ), _s1 AS ( SELECT - COALESCE(SUM(l_extendedprice * ( + SUM(l_extendedprice * ( 1 - l_discount - )), 0) AS total_revenue, + )) AS sum_expr_2, l_suppkey - FROM _t5 + FROM _t6 GROUP BY l_suppkey ), _s2 AS ( SELECT - MAX(_s1.total_revenue) AS max_revenue + MAX(COALESCE(_s1.sum_expr_2, 0)) AS max_revenue FROM tpch.supplier AS supplier JOIN _s1 AS _s1 ON _s1.l_suppkey = supplier.s_suppkey ), _s5 AS ( SELECT - COALESCE(SUM(l_extendedprice * ( + SUM(l_extendedprice * ( 1 - l_discount - )), 0) AS total_revenue, - l_suppkey, + )) AS sum_expr_3, SUM(l_extendedprice * ( 1 - l_discount - )) AS sum_expr_3 - FROM _t5 + )) AS sum_expr_3_1, + l_suppkey + FROM _t6 GROUP BY l_suppkey ) @@ -40,11 +40,11 @@ SELECT supplier.s_name AS S_NAME, supplier.s_address AS S_ADDRESS, supplier.s_phone AS S_PHONE, - _s5.total_revenue AS TOTAL_REVENUE + COALESCE(_s5.sum_expr_3, 0) AS TOTAL_REVENUE FROM _s2 AS _s2 CROSS JOIN tpch.supplier AS supplier JOIN _s5 AS _s5 - ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3_1, 0) AND _s5.l_suppkey = supplier.s_suppkey ORDER BY s_suppkey diff --git a/tests/test_sql_refsols/tpch_q18_ansi.sql b/tests/test_sql_refsols/tpch_q18_ansi.sql index aa9134752..575feb447 100644 --- a/tests/test_sql_refsols/tpch_q18_ansi.sql +++ b/tests/test_sql_refsols/tpch_q18_ansi.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _s3 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_s3.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _t1 AS _t1 - ON NOT _t1.sum_l_quantity IS NULL - AND _t1.l_orderkey = orders.o_orderkey - AND _t1.sum_l_quantity > 300 +JOIN _s3 AS _s3 + ON NOT _s3.sum_l_quantity IS NULL + AND _s3.l_orderkey = orders.o_orderkey + AND _s3.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate diff --git a/tests/test_sql_refsols/tpch_q18_sqlite.sql b/tests/test_sql_refsols/tpch_q18_sqlite.sql index aa9134752..575feb447 100644 --- a/tests/test_sql_refsols/tpch_q18_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q18_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _s3 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_s3.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _t1 AS _t1 - ON NOT _t1.sum_l_quantity IS NULL - AND _t1.l_orderkey = orders.o_orderkey - AND _t1.sum_l_quantity > 300 +JOIN _s3 AS _s3 + ON NOT _s3.sum_l_quantity IS NULL + AND _s3.l_orderkey = orders.o_orderkey + AND _s3.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 6ee033739..64803c51d 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -1,6 +1,6 @@ WITH _s3 AS ( SELECT - COALESCE(SUM(l_quantity), 0) AS part_qty, + SUM(l_quantity) AS sum_l_quantity, l_partkey FROM tpch.lineitem WHERE @@ -10,7 +10,7 @@ WITH _s3 AS ( ), _s5 AS ( SELECT part.p_partkey, - _s3.part_qty + _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 ON _s3.l_partkey = part.p_partkey @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.part_qty, 0) + 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) GROUP BY partsupp.ps_suppkey diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index ff41af883..e5b221d69 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -1,6 +1,6 @@ WITH _s3 AS ( SELECT - COALESCE(SUM(l_quantity), 0) AS part_qty, + SUM(l_quantity) AS sum_l_quantity, l_partkey FROM tpch.lineitem WHERE @@ -10,7 +10,7 @@ WITH _s3 AS ( ), _s5 AS ( SELECT part.p_partkey, - _s3.part_qty + _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 ON _s3.l_partkey = part.p_partkey @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.part_qty, 0) + 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) GROUP BY partsupp.ps_suppkey From 1fff8eabaf640bb2f286db26bf4501099a27bc03 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 12 Jul 2025 20:51:05 -0400 Subject: [PATCH 010/143] Fixed pullup bugs --- pydough/conversion/projection_pullup.py | 6 ++- pydough/sqlglot/sqlglot_relational_visitor.py | 20 +-------- .../aggregate_then_backref.txt | 11 ++--- tests/test_plan_refsols/bad_child_reuse_2.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 4 +- tests/test_plan_refsols/common_prefix_ad.txt | 21 +++++----- tests/test_plan_refsols/common_prefix_al.txt | 4 +- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_aq.txt | 18 ++++---- tests/test_plan_refsols/common_prefix_b.txt | 11 ++--- tests/test_plan_refsols/common_prefix_c.txt | 17 ++++---- tests/test_plan_refsols/common_prefix_d.txt | 29 ++++++++----- tests/test_plan_refsols/common_prefix_f.txt | 11 ++--- tests/test_plan_refsols/common_prefix_g.txt | 11 ++--- tests/test_plan_refsols/common_prefix_h.txt | 17 ++++---- tests/test_plan_refsols/common_prefix_j.txt | 9 ++-- tests/test_plan_refsols/common_prefix_k.txt | 9 ++-- tests/test_plan_refsols/common_prefix_l.txt | 31 +++++++------- tests/test_plan_refsols/common_prefix_m.txt | 32 +++++++------- tests/test_plan_refsols/common_prefix_o.txt | 21 +++++----- tests/test_plan_refsols/common_prefix_p.txt | 13 +++--- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/common_prefix_v.txt | 11 ++--- tests/test_plan_refsols/common_prefix_w.txt | 13 +++--- tests/test_plan_refsols/correl_14.txt | 23 +++++----- tests/test_plan_refsols/correl_15.txt | 30 ++++++------- tests/test_plan_refsols/correl_20.txt | 15 ++++--- tests/test_plan_refsols/correl_26.txt | 21 +++++----- tests/test_plan_refsols/correl_27.txt | 19 +++++---- tests/test_plan_refsols/correl_28.txt | 15 +++---- tests/test_plan_refsols/correl_29.txt | 33 ++++++++------- tests/test_plan_refsols/correl_30.txt | 23 +++++----- tests/test_plan_refsols/correl_31.txt | 27 ++++++------ tests/test_plan_refsols/correl_34.txt | 15 +++---- .../count_cust_supplier_nation_combos.txt | 13 +++--- .../epoch_culture_events_info.txt | 26 +++++++----- ...lineitems_access_cust_supplier_nations.txt | 17 ++++---- .../lines_shipping_vs_customer_region.txt | 22 +++++----- .../mostly_positive_accounts_per_nation3.txt | 17 ++++---- .../multi_partition_access_5.txt | 8 ++-- tests/test_plan_refsols/nation_best_order.txt | 4 +- .../num_positive_accounts_per_nation.txt | 13 +++--- .../test_plan_refsols/supplier_best_part.txt | 8 ++-- ...hnograph_incident_rate_by_release_year.txt | 9 ++-- .../technograph_monthly_incident_rate.txt | 29 ++++++------- ...umulative_incident_rate_goldcopperstar.txt | 25 +++++------ ..._year_cumulative_incident_rate_overall.txt | 15 +++---- tests/test_plan_refsols/tpch_q10.txt | 21 +++++----- tests/test_plan_refsols/tpch_q11.txt | 4 +- tests/test_plan_refsols/tpch_q18.txt | 4 +- tests/test_plan_refsols/tpch_q2.txt | 11 ++--- tests/test_plan_refsols/tpch_q20.txt | 15 +++---- tests/test_plan_refsols/tpch_q5.txt | 25 +++++------ .../window_filter_order_10.txt | 13 +++--- tests/test_relational_nodes_to_sqlglot.py | 42 +++++++++++++++++-- .../func_rank_with_filters_a_ansi.sql | 8 ++-- ...technograph_monthly_incident_rate_ansi.sql | 26 ++++++------ ...chnograph_monthly_incident_rate_sqlite.sql | 26 ++++++------ ...tive_incident_rate_goldcopperstar_ansi.sql | 10 ++--- ...ve_incident_rate_goldcopperstar_sqlite.sql | 10 ++--- tests/test_sql_refsols/tpch_q20_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 4 +- 62 files changed, 526 insertions(+), 455 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index cdfe2a553..5a575c249 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -41,11 +41,13 @@ def widen_columns( } substitutions: dict[RelationalExpression, RelationalExpression] = {} for input_idx in range(len(node.inputs)): + input_alias: str | None = node.default_input_aliases[input_idx] input_node: RelationalNode = node.inputs[input_idx] for name, expr in input_node.columns.items(): - expr = add_input_name(expr, node.default_input_aliases[input_idx]) + if isinstance(node, Join): + expr = add_input_name(expr, input_alias) ref_expr: ColumnReference = ColumnReference( - name, expr.data_type, input_name=node.default_input_aliases[input_idx] + name, expr.data_type, input_name=input_alias ) if expr not in existing_vals: new_name: str = name diff --git a/pydough/sqlglot/sqlglot_relational_visitor.py b/pydough/sqlglot/sqlglot_relational_visitor.py index e0b4b1475..5c3572dfd 100644 --- a/pydough/sqlglot/sqlglot_relational_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_visitor.py @@ -39,7 +39,7 @@ ) from .sqlglot_helpers import get_glot_name, set_glot_alias, unwrap_alias -from .sqlglot_identifier_finder import find_identifiers, find_identifiers_in_list +from .sqlglot_identifier_finder import find_identifiers_in_list from .sqlglot_relational_expression_visitor import SQLGlotRelationalExpressionVisitor __all__ = ["SQLGlotRelationalVisitor"] @@ -455,23 +455,7 @@ def visit_filter(self, filter: Filter) -> None: # QUALIFY. query = self._build_subquery(query, exprs) else: - # TODO: (gh #151) Refactor a simpler way to check dependent expressions. - if ( - "group" in input_expr.args - or "distinct" in input_expr.args - or "where" in input_expr.args - or "qualify" in input_expr.args - or "order" in input_expr.args - or "limit" in input_expr.args - ): - # Check if we already have a where clause or limit. We - # cannot merge these yet. - # TODO: (gh #151) Consider allowing combining where if - # limit isn't present? - query = self._build_subquery(input_expr, exprs) - else: - # Try merge the column sections - query = self._merge_selects(exprs, input_expr, find_identifiers(cond)) + query = self._build_subquery(input_expr, exprs) query = query.where(cond) self._stack.append(query) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 8fa56b648..5a6627a0f 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,7 +1,8 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + PROJECT(columns={'o_orderkey_1': o_orderkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 1f5ca48a2..e1c4d902b 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows, 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 1f5ca48a2..e1c4d902b 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows, 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 2e01acf9c..2371e5b10 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,16 +1,17 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', qty_shipped)], orderings=[(s_name):asc_first]) PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric), 's_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 11b514f75..12f4fa377 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -3,8 +3,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discou LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'n_orders': n_orders}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 23c6a811a..126aa69c8 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 9f93ea84e..e8d037b92 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -1,13 +1,15 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier', s_name), ('best_part', p_name), ('best_quantity', ps_availqty)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t1.s_name_1}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) - FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 's_name_1': s_name, 's_nationkey_1': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 'ps_suppkey_1': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 4d1bb2447..6fdd01f9f 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -3,11 +3,12 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'n_suppliers': t1.n_suppliers}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 34e0d2986..2c6a310e8 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -5,14 +5,15 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 078ad6f20..79a8159e7 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -5,19 +5,26 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_expr_10': t0.sum_expr_10_1, 'sum_expr_7': t0.sum_expr_7_1, 'sum_n_rows': t0.sum_n_rows_1}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_expr_10_1': sum_expr_10, 'sum_expr_7_1': sum_expr_7, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey_1, 'expr_10': t0.n_rows_1, 'expr_7': t0.expr_7_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'expr_7_1': expr_7, 'n_rows_1': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey_1, 'expr_7': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey_1': c_nationkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index dd8ca64e5..20bfbcac7 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -3,11 +3,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t1.sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 848a95bb5..4629f2fa8 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -3,11 +3,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) PROJECT(columns={'agg_2': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'n_suppliers': t1.n_suppliers}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index eb6de35e5..114ac4f25 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -5,14 +5,15 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders) PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_j.txt b/tests/test_plan_refsols/common_prefix_j.txt index a338a9b8e..db722c70a 100644 --- a/tests/test_plan_refsols/common_prefix_j.txt +++ b/tests/test_plan_refsols/common_prefix_j.txt @@ -1,7 +1,8 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name_1, 'r_name': t1.r_name_1}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_k.txt b/tests/test_plan_refsols/common_prefix_k.txt index 4c3a0abf6..28f7e96d9 100644 --- a/tests/test_plan_refsols/common_prefix_k.txt +++ b/tests/test_plan_refsols/common_prefix_k.txt @@ -1,7 +1,8 @@ ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name_1, 'r_name': t1.r_name_1}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 407a9894e..5c8942e45 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,19 +1,20 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum)], orderings=[(c_name):asc_first]) PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal_1, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows_1, 'sum_s_acctbal': t1.sum_s_acctbal_1}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows_1': n_rows, 'sum_s_acctbal_1': sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 65b095b33..007c85b0b 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,19 +1,21 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum), ('nation_name', n_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': n_selected_suppliers, 'selected_suppliers_avg': selected_suppliers_avg, 'selected_suppliers_sum': selected_suppliers_sum}, orderings=[(c_name):asc_first]) PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) + JOIN(condition=t0.n_regionkey_1 == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal_1, 'min_s_acctbal': t0.min_s_acctbal_1, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 03557163c..4a9dff08f 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,8 +1,8 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) @@ -11,13 +11,14 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + JOIN(condition=t0.l_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5_1, 'l_orderkey': t0.l_orderkey_1, 'l_suppkey': t0.l_suppkey_1, 'p_retailprice': t1.p_retailprice}) + PROJECT(columns={'agg_5_1': agg_5, 'l_orderkey_1': l_orderkey, 'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 4678a87a4..78527ac1d 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,12 +1,13 @@ ROOT(columns=[('name', c_name), ('n_orders', n_orders), ('n_parts_ordered', n_parts_ordered), ('n_distinct_parts', n_distinct_parts)], orderings=[(ordering_3):asc_first, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_distinct_parts': n_distinct_parts, 'n_orders': n_orders, 'n_parts_ordered': n_parts_ordered, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) PROJECT(columns={'c_name': c_name, 'n_distinct_parts': DEFAULT_TO(ndistinct_l_partkey, 0:numeric), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_parts_ordered': DEFAULT_TO(n_rows_1, 0:numeric), 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_name_1': c_name, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 9d8b97da5..dec2085d2 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) - PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) + FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name_1, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate_1}) + PROJECT(columns={'c_name_1': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate_1': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index 2c10e35a7..a1733f226 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,8 +1,9 @@ ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name_1}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_w.txt b/tests/test_plan_refsols/common_prefix_w.txt index 4b633dbd0..885853056 100644 --- a/tests/test_plan_refsols/common_prefix_w.txt +++ b/tests/test_plan_refsols/common_prefix_w.txt @@ -1,9 +1,10 @@ ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'o_orderkey': o_orderkey}, orderings=[(o_orderkey):asc_first]) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + PROJECT(columns={'c_custkey_1': c_custkey, 'n_name_1': n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index feffdfb1a..d0f332474 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -2,16 +2,17 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 6d7af3f9c..53149e564 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -2,19 +2,21 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price_1, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'global_avg_price_1': global_avg_price, 'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price_1, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'global_avg_price_1': global_avg_price, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 0887475a3..a480ec3e3 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,15 +1,14 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey}) - FILTER(condition=n_name == n_name, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index e7c7cde56..7aefdb4d0 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -3,16 +3,17 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) PROJECT(columns={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'o_orderkey_1': o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index ac9583af3..4ef8b131d 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -4,15 +4,16 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_regionkey': t0.n_regionkey_1}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'o_orderkey_1': o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 3f6839aaf..1ee36d030 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -4,13 +4,14 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_regionkey': t0.n_regionkey_1}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'o_orderkey_1': o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 29bf6bb22..9c6d69d8f 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,21 +1,22 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_above_avg_customers), ('n_above_avg_suppliers', n_above_avg_suppliers), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) PROJECT(columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'max_c_acctbal': max_c_acctbal, 'min_c_acctbal': min_c_acctbal, 'n_above_avg_customers': DEFAULT_TO(n_rows, 0:numeric), 'n_above_avg_suppliers': DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) - FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.anything_n_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name_1, 'anything_n_regionkey': t0.anything_n_regionkey_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'min_c_acctbal': t0.min_c_acctbal_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + PROJECT(columns={'anything_n_name_1': anything_n_name, 'anything_n_nationkey_1': anything_n_nationkey, 'anything_n_regionkey_1': anything_n_regionkey, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows': n_rows}) + JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) + FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 9f4248442..761246fb9 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -3,17 +3,18 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': LOWER(r_name)}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal_1, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + PROJECT(columns={'avg_cust_acctbal_1': avg_cust_acctbal, 'n_name_1': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index c868fedf4..1cfd16b15 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,17 +1,18 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(revenue), 'median_rev': MEDIAN(revenue), 'nation_name': ANYTHING(n_name)}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey}) + PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name_1': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 4ee636ab6..1bbb483bf 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -5,14 +5,15 @@ ROOT(columns=[('n', n)], orderings=[]) JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_name_1': n_name, 's_suppkey_1': s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=l_linestatus == 'F':string & l_returnflag == 'N':string, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_linestatus': l_linestatus, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_returnflag': l_returnflag, 'l_suppkey': l_suppkey}) FILTER(condition=YEAR(o_orderdate) >= 1995:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index 21bb4c0e6..edd89b822 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -7,12 +7,13 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': year}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) PROJECT(columns={'agg_0': 1:numeric, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'sum_l_extendedprice': sum_l_extendedprice, 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + PROJECT(columns={'n_name_1': n_name, 'o_orderdate': o_orderdate, 'o_orderkey_1': o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index a7a2bb160..ef7fa9411 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,15 +1,19 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', event_year), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': event_year, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': YEAR(ev_dt), 's_name': s_name, 't_name': t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) - FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=t0.ev_key_1 == t1.ev_key_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_name': t0.ev_name_1, 's_name': t0.s_name_1, 't_name': t1.t_name_1}) + PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key_1': ev_key, 'ev_name_1': ev_name, 's_name_1': s_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name_1, 's_name': t1.s_name_1}) + PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key': ev_key, 'ev_name_1': ev_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + PROJECT(columns={'ev_key': ev_key, 's_name_1': s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + PROJECT(columns={'ev_key_1': ev_key, 't_name_1': t_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index c9f1900fe..179adc4d0 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,12 +1,13 @@ ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.l_orderkey_1 == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name_1}) + PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey_1': l_orderkey, 'l_shipdate': l_shipdate, 'n_name_1': n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt index e06f975c8..6f090bc1b 100644 --- a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt +++ b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt @@ -1,14 +1,16 @@ ROOT(columns=[('order_year', YEAR(o_orderdate)), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.l_partkey_1 == t1.ps_partkey & t0.l_suppkey_1 == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name_1, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name_1, 'supplier_region_name': t1.r_name}) + PROJECT(columns={'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey, 'n_name_1': n_name, 'o_orderdate': o_orderdate, 'r_name_1': r_name}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name_1}) + PROJECT(columns={'n_name_1': n_name, 'o_orderdate': o_orderdate, 'o_orderkey_1': o_orderkey, 'r_name_1': r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index d185fd519..a57480e07 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,11 +1,12 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) - FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) - PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name_1, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) + PROJECT(columns={'n_name_1': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) + PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index bca9f137e..86f173ea6 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) - FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans_1, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId_1, 'sbTxType': sbTxType_1}) + PROJECT(columns={'n_ticker_trans_1': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId_1': sbTxTickerId, 'sbTxType_1': sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) @@ -10,8 +10,8 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId_1, 'sbTxType': sbTxType_1}) + PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId_1': sbTxTickerId, 'sbTxType_1': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index caee91153..738a980a1 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,8 +4,8 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) - PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name_1, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + PROJECT(columns={'c_name_1': c_name, 'c_nationkey': c_nationkey, 'o_orderkey_1': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index 7706c56b5..21297f633 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,9 +1,10 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) + PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 223d5aab8..734ac6e6f 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -5,10 +5,10 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows_1, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - PROJECT(columns={'n_rows_1': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows_1, 'p_name': p_name_1, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) + PROJECT(columns={'n_rows_1': n_rows, 'p_name_1': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey_1, 'sum_l_quantity': t0.sum_l_quantity}) + PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey_1': ps_partkey, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_rows_1': n_rows, 'sum_l_quantity': sum_l_quantity}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index acb036880..ae75e6956 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -9,8 +9,9 @@ ROOT(columns=[('year', release_year), ('ir', ir)], orderings=[(release_year):asc SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) PROJECT(columns={'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + JOIN(condition=t0.de_id_1 == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + PROJECT(columns={'de_id_1': de_id, 'pr_release': pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 6a96f0f3d..295a255e1 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -2,21 +2,22 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) PROJECT(columns={'ir': ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric), 'month': month, 'month_0': JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_3': expr_3, 'month': MONTH(ca_dt), 'n_rows': n_rows, 'year': year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows, 'year': t0.year_1}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows, 'year_1': year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 191147ea3..7a7c31f77 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -8,18 +8,19 @@ ROOT(columns=[('years_since_release', years_since_release), ('cum_ir', cum_ir), SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index f4222569e..f3186c8d6 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -5,13 +5,14 @@ ROOT(columns=[('yr', year), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index f92f003ec..a77131552 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,14 +1,15 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) + JOIN(condition=t0.c_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal_1, 'c_address': t0.c_address_1, 'c_comment': t0.c_comment_1, 'c_custkey': t0.c_custkey_1, 'c_name': t0.c_name_1, 'c_phone': t0.c_phone_1, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) + PROJECT(columns={'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'c_nationkey_1': c_nationkey, 'c_phone_1': c_phone, 'sum_expr_1': sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index e81f6331d..de30994c3 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,7 +1,7 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}, orderings=[(VALUE):desc_last]) - FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) + FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey_1}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey_1': ps_partkey, 'sum_metric': sum_metric}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) PROJECT(columns={'metric': ps_supplycost * ps_availqty}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index 8de1daa53..d1bcc2be8 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,7 +1,7 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey_1, 'c_name': c_name_1, 'o_orderdate': o_orderdate_1, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice_1}) + PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'o_orderdate_1': o_orderdate, 'o_orderkey_1': o_orderkey, 'o_totalprice_1': o_totalprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index bbad37875..06c5ad1a2 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -6,9 +6,10 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 81644d8d2..cce78a05f 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -8,11 +8,12 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.p_partkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + PROJECT(columns={'p_partkey_1': p_partkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index 59ff61eef..d7cf1c363 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -2,18 +2,19 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', REVENUE)], orderings=[(RE PROJECT(columns={'REVENUE': DEFAULT_TO(sum_value, 0:numeric), 'anything_n_name': anything_n_name}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey_1}) + PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 28100ecde..c436e164f 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,8 +1,9 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) - FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=o_totalprice_1 < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + PROJECT(columns={'o_totalprice_1': o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_relational_nodes_to_sqlglot.py b/tests/test_relational_nodes_to_sqlglot.py index c992246ca..be5c9f52f 100644 --- a/tests/test_relational_nodes_to_sqlglot.py +++ b/tests/test_relational_nodes_to_sqlglot.py @@ -332,7 +332,15 @@ def mkglot_func(op: type[Expression], args: list[Expression]) -> Expression: Ident(this="a", quoted=False), Ident(this="b", quoted=False), ], - _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + _from=GlotFrom( + mkglot( + expressions=[ + Ident(this="a", quoted=False), + Ident(this="b", quoted=False), + ], + _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + ) + ), where=mkglot_func( EQ, [Ident(this="a", quoted=False), mk_literal(1, False)] ), @@ -379,7 +387,17 @@ def mkglot_func(op: type[Expression], args: list[Expression]) -> Expression: Ident(this="a", quoted=False), Ident(this="b", quoted=False), ], - _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + _from=GlotFrom( + mkglot( + expressions=[ + Ident(this="a", quoted=False), + Ident(this="b", quoted=False), + ], + _from=GlotFrom( + Table(this=Ident(this="table", quoted=False)) + ), + ) + ), where=mkglot_func( EQ, [Ident(this="a", quoted=False), mk_literal(1, False)] ), @@ -1419,7 +1437,15 @@ def mkglot_func(op: type[Expression], args: list[Expression]) -> Expression: where=mkglot_func( EQ, [Ident(this="a", quoted=False), mk_literal(1, False)] ), - _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + _from=GlotFrom( + mkglot( + expressions=[ + Ident(this="a", quoted=False), + Ident(this="b", quoted=False), + ], + _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + ) + ), ), id="root_after_filter", ), @@ -1781,7 +1807,15 @@ def test_expression_identifiers(expr: Expression, expected: set[Ident]) -> None: where=mkglot_func( EQ, [Ident(this="a", quoted=False), mk_literal(1, False)] ), - _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + _from=GlotFrom( + mkglot( + expressions=[ + Ident(this="a", quoted=False), + Ident(this="b", quoted=False), + ], + _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + ) + ), ), id="root_after_filter", ), diff --git a/tests/test_sql_refsols/func_rank_with_filters_a_ansi.sql b/tests/test_sql_refsols/func_rank_with_filters_a_ansi.sql index 322ee79e7..d2b5d7fcf 100644 --- a/tests/test_sql_refsols/func_rank_with_filters_a_ansi.sql +++ b/tests/test_sql_refsols/func_rank_with_filters_a_ansi.sql @@ -1,16 +1,14 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT RANK() OVER (ORDER BY a) AS r, a, b FROM table - WHERE - b = 0 ) SELECT a, b, r -FROM _t0 +FROM _t1 WHERE - r >= 3 + b = 0 AND r >= 3 diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 593db1ec1..211f66449 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -4,7 +4,7 @@ WITH _t5 AS ( FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t7 AS ( +), _t8 AS ( SELECT co_id, co_name @@ -14,29 +14,29 @@ WITH _t5 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _s0.ca_dt - FROM _t5 AS _s0 + _t7.ca_dt + FROM _t5 AS _t7 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_s0.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_t7.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t7 AS _t7 - ON _t7.co_id = devices.de_production_country_id + JOIN _t8 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _s0.ca_dt + _t7.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _s8.ca_dt - FROM _t5 AS _s8 + _t10.ca_dt + FROM _t5 AS _t10 JOIN main.incidents AS incidents - ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _t10.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t9 - ON _t9.co_id = devices.de_production_country_id + JOIN _t8 AS _t11 + ON _t11.co_id = devices.de_production_country_id GROUP BY - _s8.ca_dt + _t10.ca_dt ) SELECT CONCAT_WS( diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index 581509700..e896b36ac 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -4,7 +4,7 @@ WITH _t5 AS ( FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t7 AS ( +), _t8 AS ( SELECT co_id, co_name @@ -14,29 +14,29 @@ WITH _t5 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _s0.ca_dt - FROM _t5 AS _s0 + _t7.ca_dt + FROM _t5 AS _t7 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_s0.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_t7.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t7 AS _t7 - ON _t7.co_id = devices.de_production_country_id + JOIN _t8 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _s0.ca_dt + _t7.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _s8.ca_dt - FROM _t5 AS _s8 + _t10.ca_dt + FROM _t5 AS _t10 JOIN main.incidents AS incidents - ON _s8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _t10.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t9 - ON _t9.co_id = devices.de_production_country_id + JOIN _t8 AS _t11 + ON _t11.co_id = devices.de_production_country_id GROUP BY - _s8.ca_dt + _t10.ca_dt ) SELECT CONCAT_WS( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql index 712284e0c..292b7386e 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t8 AS ( +), _t9 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t8 - ON _t8.pr_id = devices.de_product_id + JOIN _t9 AS _t9 + ON _t9.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t8 AS _t10 - ON _t10.pr_id = devices.de_product_id + JOIN _t9 AS _t11 + ON _t11.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql index e49cda5d1..4becabea9 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t8 AS ( +), _t9 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t8 - ON _t8.pr_id = devices.de_product_id + JOIN _t9 AS _t9 + ON _t9.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t8 AS _t10 - ON _t10.pr_id = devices.de_product_id + JOIN _t9 AS _t11 + ON _t11.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 64803c51d..52747c6e5 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -9,7 +9,7 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - part.p_partkey, + part.p_partkey AS p_partkey_1, _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 @@ -22,7 +22,7 @@ WITH _s3 AS ( partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 - ON _s5.p_partkey = partsupp.ps_partkey + ON _s5.p_partkey_1 = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index e5b221d69..c0f053dfc 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -9,7 +9,7 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - part.p_partkey, + part.p_partkey AS p_partkey_1, _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 @@ -22,7 +22,7 @@ WITH _s3 AS ( partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 - ON _s5.p_partkey = partsupp.ps_partkey + ON _s5.p_partkey_1 = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) From 07136d2a6838758bf947857abb8ee770c4c78f62 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 12 Jul 2025 21:09:28 -0400 Subject: [PATCH 011/143] Pullup with LIMIT [RUN CI] --- pydough/conversion/merge_projects.py | 50 ++++++++--- pydough/conversion/projection_pullup.py | 55 +++++++++++- tests/test_plan_refsols/bad_child_reuse_1.txt | 8 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 17 ++-- tests/test_plan_refsols/bad_child_reuse_3.txt | 17 ++-- tests/test_plan_refsols/bad_child_reuse_4.txt | 19 ++-- tests/test_plan_refsols/bad_child_reuse_5.txt | 8 +- tests/test_plan_refsols/common_prefix_ad.txt | 32 ++++--- tests/test_plan_refsols/common_prefix_ag.txt | 89 +++++++++---------- tests/test_plan_refsols/common_prefix_ah.txt | 71 ++++++++------- tests/test_plan_refsols/common_prefix_ai.txt | 65 +++++++------- tests/test_plan_refsols/common_prefix_aj.txt | 89 +++++++++---------- tests/test_plan_refsols/common_prefix_ak.txt | 87 +++++++++--------- tests/test_plan_refsols/common_prefix_al.txt | 35 ++++---- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_an.txt | 45 +++++----- tests/test_plan_refsols/common_prefix_ao.txt | 55 ++++++------ tests/test_plan_refsols/common_prefix_c.txt | 45 +++++----- tests/test_plan_refsols/common_prefix_d.txt | 59 ++++++------ tests/test_plan_refsols/common_prefix_h.txt | 45 +++++----- tests/test_plan_refsols/common_prefix_i.txt | 23 +++-- tests/test_plan_refsols/common_prefix_l.txt | 39 ++++---- tests/test_plan_refsols/common_prefix_m.txt | 41 +++++---- tests/test_plan_refsols/common_prefix_n.txt | 49 +++++----- tests/test_plan_refsols/common_prefix_o.txt | 55 ++++++------ tests/test_plan_refsols/common_prefix_p.txt | 6 +- tests/test_plan_refsols/common_prefix_q.txt | 4 +- tests/test_plan_refsols/common_prefix_r.txt | 4 +- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/correl_1.txt | 15 ++-- tests/test_plan_refsols/correl_10.txt | 13 ++- tests/test_plan_refsols/correl_14.txt | 32 +++---- tests/test_plan_refsols/correl_15.txt | 40 +++++---- tests/test_plan_refsols/correl_17.txt | 9 +- tests/test_plan_refsols/correl_18.txt | 17 ++-- tests/test_plan_refsols/correl_2.txt | 27 +++--- tests/test_plan_refsols/correl_20.txt | 26 +++--- tests/test_plan_refsols/correl_24.txt | 14 ++- tests/test_plan_refsols/correl_29.txt | 59 ++++++------ tests/test_plan_refsols/correl_3.txt | 21 +++-- tests/test_plan_refsols/correl_32.txt | 4 +- .../cumulative_stock_analysis.txt | 12 ++- .../customer_largest_order_deltas.txt | 4 +- .../customer_most_recent_orders.txt | 4 +- tests/test_plan_refsols/datetime_relative.txt | 7 +- .../test_plan_refsols/deep_best_analysis.txt | 4 +- tests/test_plan_refsols/dumb_aggregation.txt | 11 ++- .../epoch_culture_events_info.txt | 37 ++++---- .../epoch_intra_season_searches.txt | 49 +++++----- .../epoch_pct_searches_per_tod.txt | 11 ++- .../epoch_search_results_by_tod.txt | 11 ++- .../epoch_unique_users_per_engine.txt | 19 ++-- tests/test_plan_refsols/exponentiation.txt | 6 +- tests/test_plan_refsols/floor_and_ceil_2.txt | 6 +- tests/test_plan_refsols/function_sampler.txt | 17 ++-- tests/test_plan_refsols/hour_minute_day.txt | 11 ++- .../minutes_seconds_datediff.txt | 9 +- .../month_year_sliding_windows.txt | 29 +++--- .../mostly_positive_accounts_per_nation3.txt | 4 +- .../multi_partition_access_5.txt | 8 +- .../multi_partition_access_6.txt | 53 +++++------ tests/test_plan_refsols/nation_best_order.txt | 4 +- .../test_plan_refsols/nation_window_aggs.txt | 7 +- .../test_plan_refsols/order_quarter_test.txt | 9 +- .../ordering_name_overload.txt | 5 +- .../orders_versus_first_orders.txt | 4 +- tests/test_plan_refsols/padding_functions.txt | 7 +- tests/test_plan_refsols/part_reduced_size.txt | 14 +-- .../parts_quantity_increase_95_96.txt | 6 +- tests/test_plan_refsols/prev_next_regions.txt | 5 +- .../rank_nations_per_region_by_customers.txt | 4 +- ...rank_parts_per_supplier_region_by_size.txt | 4 +- .../region_nation_window_aggs.txt | 11 ++- .../region_orders_from_nations_richest.txt | 21 +++-- tests/test_plan_refsols/sign.txt | 8 +- tests/test_plan_refsols/simple_cross_10.txt | 21 +++-- tests/test_plan_refsols/simple_cross_4.txt | 17 ++-- tests/test_plan_refsols/singular4.txt | 15 ++-- tests/test_plan_refsols/singular7.txt | 29 +++--- .../test_plan_refsols/supplier_best_part.txt | 4 +- .../supplier_pct_national_qty.txt | 6 +- .../test_plan_refsols/suppliers_bal_diffs.txt | 4 +- ...chnograph_country_combination_analysis.txt | 4 +- ...nograph_country_incident_rate_analysis.txt | 37 ++++---- ...aph_error_percentages_sun_set_by_error.txt | 21 +++-- ..._error_rate_sun_set_by_factory_country.txt | 25 +++--- ...hnograph_incident_rate_by_release_year.txt | 33 ++++--- .../technograph_incident_rate_per_brand.txt | 19 ++-- .../technograph_monthly_incident_rate.txt | 56 ++++++------ .../technograph_most_unreliable_products.txt | 4 +- ...umulative_incident_rate_goldcopperstar.txt | 50 +++++------ ..._year_cumulative_incident_rate_overall.txt | 32 ++++--- .../time_threshold_reached.txt | 4 +- ...top_5_nations_balance_by_num_suppliers.txt | 13 ++- .../test_plan_refsols/topk_order_by_calc.txt | 7 +- tests/test_plan_refsols/tpch_q1.txt | 11 ++- tests/test_plan_refsols/tpch_q10.txt | 4 +- tests/test_plan_refsols/tpch_q11.txt | 4 +- tests/test_plan_refsols/tpch_q12.txt | 15 ++-- tests/test_plan_refsols/tpch_q15.txt | 34 ++++--- tests/test_plan_refsols/tpch_q18.txt | 4 +- tests/test_plan_refsols/tpch_q20.txt | 21 ++--- tests/test_plan_refsols/tpch_q22.txt | 31 +++---- tests/test_plan_refsols/tpch_q5.txt | 39 ++++---- tests/test_plan_refsols/tpch_q7.txt | 31 ++++--- tests/test_plan_refsols/tpch_q9.txt | 33 ++++--- .../window_filter_order_10.txt | 4 +- .../window_sliding_frame_relsize.txt | 4 +- .../window_sliding_frame_relsum.txt | 4 +- .../years_months_days_hours_datediff.txt | 6 +- .../yoy_change_in_num_orders.txt | 9 +- .../cumulative_stock_analysis_ansi.sql | 70 +++++++-------- .../cumulative_stock_analysis_sqlite.sql | 70 +++++++-------- tests/test_sql_refsols/datediff_ansi.sql | 20 +++-- tests/test_sql_refsols/datediff_sqlite.sql | 20 +++-- .../defog_broker_adv3_ansi.sql | 4 +- .../defog_broker_adv3_sqlite.sql | 4 +- .../defog_broker_adv5_ansi.sql | 11 ++- .../defog_broker_adv5_sqlite.sql | 11 ++- .../defog_broker_basic3_ansi.sql | 24 +++-- .../defog_broker_basic3_sqlite.sql | 24 +++-- .../defog_dealership_adv5_ansi.sql | 26 ++---- .../defog_dealership_adv5_sqlite.sql | 26 ++---- .../defog_dealership_basic10_ansi.sql | 27 ++++-- .../defog_dealership_basic10_sqlite.sql | 27 ++++-- .../defog_dealership_basic5_ansi.sql | 27 ++++-- .../defog_dealership_basic5_sqlite.sql | 27 ++++-- .../defog_dealership_basic8_ansi.sql | 27 ++++-- .../defog_dealership_basic8_sqlite.sql | 27 ++++-- .../defog_ewallet_adv11_ansi.sql | 2 +- .../defog_ewallet_adv11_sqlite.sql | 2 +- .../defog_ewallet_basic10_ansi.sql | 24 +++-- .../defog_ewallet_basic10_sqlite.sql | 24 +++-- .../defog_ewallet_basic8_ansi.sql | 24 +++-- .../defog_ewallet_basic8_sqlite.sql | 24 +++-- .../epoch_culture_events_info_ansi.sql | 58 +++++++----- .../epoch_culture_events_info_sqlite.sql | 58 +++++++----- .../epoch_pct_searches_per_tod_ansi.sql | 14 +-- .../epoch_pct_searches_per_tod_sqlite.sql | 14 +-- .../epoch_search_results_by_tod_ansi.sql | 17 ++-- .../epoch_search_results_by_tod_sqlite.sql | 17 ++-- .../floor_and_ceil_2_ansi.sql | 18 +++- .../floor_and_ceil_2_sqlite.sql | 48 +++++++--- ...ph_country_incident_rate_analysis_ansi.sql | 8 +- ..._country_incident_rate_analysis_sqlite.sql | 8 +- ...rror_percentages_sun_set_by_error_ansi.sql | 23 +++-- ...or_percentages_sun_set_by_error_sqlite.sql | 32 +++---- ...technograph_monthly_incident_rate_ansi.sql | 38 ++++---- ...chnograph_monthly_incident_rate_sqlite.sql | 38 ++++---- ...tive_incident_rate_goldcopperstar_ansi.sql | 73 +++++++-------- ...ve_incident_rate_goldcopperstar_sqlite.sql | 73 +++++++-------- ..._cumulative_incident_rate_overall_ansi.sql | 61 ++++++------- ...umulative_incident_rate_overall_sqlite.sql | 61 ++++++------- .../time_threshold_reached_ansi.sql | 6 +- .../time_threshold_reached_sqlite.sql | 6 +- tests/test_sql_refsols/tpch_q11_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q11_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q15_ansi.sql | 11 +-- tests/test_sql_refsols/tpch_q15_sqlite.sql | 11 +-- tests/test_sql_refsols/tpch_q5_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q5_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q9_ansi.sql | 55 +++++++----- tests/test_sql_refsols/tpch_q9_sqlite.sql | 55 +++++++----- 163 files changed, 1985 insertions(+), 1879 deletions(-) diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index 7a22b2cad..ca2d3f4d1 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -189,18 +189,44 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: while isinstance(node.input, Project): child_project: Project = node.input if isinstance(node, RelationalRoot): - # The columns of the projection can be sucked into the root - # above it if they are all pass-through/renamings, or if there - # is no convolution created (only allowed if there are no - # ordering expressions). - if all( - isinstance(expr, ColumnReference) - for expr in child_project.columns.values() - ) or ( - len(node.orderings) == 0 - and merging_doesnt_create_convolution( - node.columns, child_project.columns - ) + # # The columns of the projection can be sucked into the root + # # above it if they are all pass-through/renamings, or if there + # # is no convolution created (only allowed if there are no + # # ordering expressions). + # if all( + # isinstance(expr, ColumnReference) + # for expr in child_project.columns.values() + # ) or ( + # len(node.orderings) == 0 + # and merging_doesnt_create_convolution( + # node.columns, child_project.columns + # ) + # ): + # # Replace all column references in the root's columns with + # # the expressions from the child projection.. + # for idx, (name, expr) in enumerate(node.ordered_columns): + # new_expr = transpose_expression(expr, child_project.columns) + # node.columns[name] = new_expr + # node.ordered_columns[idx] = (name, new_expr) + # # Do the same with the sort expressions. + # for idx, sort_info in enumerate(node.orderings): + # new_expr = transpose_expression( + # sort_info.expr, child_project.columns + # ) + # node.orderings[idx] = ExpressionSortInfo( + # new_expr, sort_info.ascending, sort_info.nulls_first + # ) + # # Delete the child projection from the tree, replacing it + # # with its input. + # node._input = child_project.input + # else: + # # Otherwise, halt the merging process since it is no longer + # # possible to merge the children of this root into it. + # break + # TODO: ADD COMMENTS + if not ( + any(contains_window(expr) for expr in child_project.columns.values()) + and any(contains_window(expr) for expr in node.columns.values()) ): # Replace all column references in the root's columns with # the expressions from the child projection.. diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 5a575c249..768944058 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -9,9 +9,11 @@ from pydough.relational import ( ColumnReference, + ExpressionSortInfo, Filter, Join, JoinType, + Limit, Project, RelationalExpression, RelationalNode, @@ -178,6 +180,55 @@ def pull_project_into_filter(node: Filter) -> None: } +def pull_project_into_limit(node: Limit) -> None: + """ + TODO + """ + if not isinstance(node.input, Project): + return + + project: Project = node.input + + finder: ColumnReferenceFinder = ColumnReferenceFinder() + finder.reset() + for expr in node.columns.values(): + expr.accept(finder) + output_cols: set[ColumnReference] = finder.get_column_references() + output_names: set[str] = {col.name for col in output_cols} + + finder.reset() + for order_expr in node.orderings: + order_expr.expr.accept(finder) + order_cols: set[ColumnReference] = finder.get_column_references() + order_names: set[str] = {col.name for col in order_cols} + + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) + substitutions: dict[RelationalExpression, RelationalExpression] = {} + for name, expr in project.columns.items(): + new_expr: RelationalExpression = apply_substitution( + expr, transfer_substitutions, {} + ) + if (not contains_window(new_expr)) and ( + (name in output_names) != (name in order_names) + ): + ref_expr: ColumnReference = ColumnReference(name, expr.data_type) + substitutions[ref_expr] = new_expr + node._columns = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.columns.items() + } + node._orderings = [ + ExpressionSortInfo( + apply_substitution(order_expr.expr, substitutions, {}), + order_expr.ascending, + order_expr.nulls_first, + ) + for order_expr in node.orderings + ] + + def pullup_projections(node: RelationalNode) -> RelationalNode: """ TODO @@ -192,8 +243,8 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: if node.join_type == JoinType.INNER: pull_project_into_join(node, 1) return pull_non_columns(node) - case Filter(): - pull_project_into_filter(node) + case Limit(): + pull_project_into_limit(node) return pull_non_columns(node) case _: return node diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index c9a72f211..e5ed44130 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders)], orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': n_orders}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) +ROOT(columns=[('cust_key', c_custkey_1), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey_1, 'n_rows': n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey, 'n_rows': n_rows_1, 'n_rows_1': n_rows}, orderings=[(c_acctbal):desc_last]) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index e1c4d902b..128fe3cf1 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,11 +1,10 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index e1c4d902b..128fe3cf1 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,11 +1,10 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index 1205908a2..0bcaee7d1 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders)], orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index fbf66f368..fe46e16ce 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders)], orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey_1, 'n_rows': n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 2371e5b10..b4202ffb5 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,17 +1,15 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', qty_shipped)], orderings=[(s_name):asc_first]) - PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric), 's_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 425e256b7..a2ecdb1b4 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,45 +1,44 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines), ('total_machine_high_domestic_revenue', total_machine_high_domestic_revenue)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric), 'total_machine_high_domestic_revenue': ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index dc6fb7436..e5ee9d7a2 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,36 +1,35 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines), ('total_machine_high_domestic_revenue', total_machine_high_domestic_revenue)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_high_domestic_lines': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(n_rows, 0:numeric), 'total_machine_high_domestic_revenue': ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index f1eba1ba7..d544d0cf7 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,33 +1,32 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines), ('total_machine_high_domestic_revenue', total_machine_high_domestic_revenue)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_n_rows, 0:numeric), 'total_machine_high_domestic_revenue': ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index 40885cbd1..b6ea23a97 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,45 +1,44 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('total_machine_high_domestic_revenue', total_machine_high_domestic_revenue)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric), 'total_machine_high_domestic_revenue': ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 694228447..036dab140 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,44 +1,43 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 12f4fa377..c20c51d81 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_no_tax_discount)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t0.n_no_tax_discount, 'n_orders': t0.n_orders}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'n_orders': n_orders}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders_1, 'n_rows': t0.n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders_1': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) FILTER(condition=n_rows > 0:numeric, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 126aa69c8..23c6a811a 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 6f23c6a66..6d114b0ce 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,23 +1,22 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_no_tax_discount)], orderings=[(c_custkey):asc_first]) - PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': agg_1, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=Literal(value=50, type=NumericType()), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + LIMIT(limit=Literal(value=50, type=NumericType()), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 20ba145d3..4ac379581 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,28 +1,27 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_no_tax_discount), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'n_orders': DEFAULT_TO(agg_1, 0:numeric), 'sum_n_rows': sum_n_rows}) - FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=Literal(value=35, type=NumericType()), columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) + LIMIT(limit=Literal(value=20, type=NumericType()), columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + LIMIT(limit=Literal(value=35, type=NumericType()), columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 2c6a310e8..34dc71b2e 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,23 +1,22 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', n_orders), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_nations': n_nations, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_suppliers': n_suppliers, 'r_name': r_name, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 79a8159e7..cb55fd26f 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,30 +1,29 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', n_orders_94), ('n_orders_95', n_orders_95), ('n_orders_96', n_orders_96)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_nations': n_nations, 'n_orders_94': DEFAULT_TO(sum_sum_expr_7, 0:numeric), 'n_orders_95': DEFAULT_TO(sum_sum_expr_10, 0:numeric), 'n_orders_96': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_suppliers': n_suppliers, 'r_name': r_name, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_expr_10': t0.sum_expr_10_1, 'sum_expr_7': t0.sum_expr_7_1, 'sum_n_rows': t0.sum_n_rows_1}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_expr_10_1': sum_expr_10, 'sum_expr_7_1': sum_expr_7, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey_1, 'expr_10': t0.n_rows_1, 'expr_7': t0.expr_7_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'expr_7_1': expr_7, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey_1, 'expr_7': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey_1': c_nationkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_expr_10': t0.sum_expr_10_1, 'sum_expr_7': t0.sum_expr_7_1, 'sum_n_rows': t0.sum_n_rows_1}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_expr_10_1': sum_expr_10, 'sum_expr_7_1': sum_expr_7, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey_1, 'expr_10': t0.n_rows_1, 'expr_7': t0.expr_7_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'expr_7_1': expr_7, 'n_rows_1': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey_1, 'expr_7': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey_1': c_nationkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 114ac4f25..1a8005c46 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,23 +1,22 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_nations': n_nations, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'r_name': r_name, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index 22743c469..5056ae384 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', n_selected_orders)], orderings=[(n_rows):desc_last, (n_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_rows': n_rows, 'n_selected_orders': n_selected_orders}, orderings=[(n_rows):desc_last, (n_name):asc_first]) - PROJECT(columns={'n_name': n_name, 'n_rows': n_rows, 'n_selected_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) +ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(n_rows):desc_last, (n_name):asc_first]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 5c8942e45..d8911515c 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,20 +1,19 @@ -ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum)], orderings=[(c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal_1, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows_1, 'sum_s_acctbal': t1.sum_s_acctbal_1}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows_1': n_rows, 'sum_s_acctbal_1': sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal_1, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows_1, 'sum_s_acctbal': t1.sum_s_acctbal_1}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows_1': n_rows, 'sum_s_acctbal_1': sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 007c85b0b..24dfe2447 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': n_selected_suppliers, 'selected_suppliers_avg': selected_suppliers_avg, 'selected_suppliers_sum': selected_suppliers_sum}, orderings=[(c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) - JOIN(condition=t0.n_regionkey_1 == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal_1, 'min_s_acctbal': t0.min_s_acctbal_1, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal_1), ('selected_suppliers_max', max_s_acctbal_1), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name_1)], orderings=[(c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) + JOIN(condition=t0.n_regionkey_1 == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal_1, 'min_s_acctbal': t0.min_s_acctbal_1, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 01c51b8e2..71e91f37d 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,27 +1,26 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 4a9dff08f..14fc1ed20 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,30 +1,29 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5_1, 'l_orderkey': t0.l_orderkey_1, 'l_suppkey': t0.l_suppkey_1, 'p_retailprice': t1.p_retailprice}) - PROJECT(columns={'agg_5_1': agg_5, 'l_orderkey_1': l_orderkey, 'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5_1, 'l_orderkey': t0.l_orderkey_1, 'l_suppkey': t0.l_suppkey_1, 'p_retailprice': t1.p_retailprice}) + PROJECT(columns={'agg_5_1': agg_5, 'l_orderkey_1': l_orderkey, 'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 78527ac1d..9b9ef4933 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('name', c_name), ('n_orders', n_orders), ('n_parts_ordered', n_parts_ordered), ('n_distinct_parts', n_distinct_parts)], orderings=[(ordering_3):asc_first, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_distinct_parts': n_distinct_parts, 'n_orders': n_orders, 'n_parts_ordered': n_parts_ordered, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'n_distinct_parts': DEFAULT_TO(ndistinct_l_partkey, 0:numeric), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_parts_ordered': DEFAULT_TO(n_rows_1, 0:numeric), 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(ordering_3_1):asc_first, (c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3_1': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) PROJECT(columns={'c_custkey_1': c_custkey, 'c_name_1': c_name, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index e7f6f5fd0..634dd4f69 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_spent', total_spent), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice_1, 'max_p_name': max_p_name_1, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'max_l_extendedprice_1': max_l_extendedprice, 'max_p_name_1': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 355dc9ad3..32ebc4101 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', total_spent)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice_1, 'max_anything_p_name': max_anything_p_name_1, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice_1': max_anything_anything_l_extendedprice, 'max_anything_p_name_1': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index dec2085d2..9d8b97da5 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name_1, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate_1}) - PROJECT(columns={'c_name_1': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate_1': o_orderdate}) + FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) + PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index d77e7b9ff..a96b6f6c4 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('region_name', r_name), ('n_prefix_nations', n_prefix_nations)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_prefix_nations': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_10.txt b/tests/test_plan_refsols/correl_10.txt index a2954ac48..6420d31d8 100644 --- a/tests/test_plan_refsols/correl_10.txt +++ b/tests/test_plan_refsols/correl_10.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('name', n_name), ('rname', NULL_4)], orderings=[(n_name):asc_first]) - PROJECT(columns={'NULL_4': None:unknown, 'n_name': n_name}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('name', n_name), ('rname', None:unknown)], orderings=[(n_name):asc_first]) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index d0f332474..3e1783235 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,18 +1,20 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) + PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) + FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 53149e564..c94a329b1 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,22 +1,24 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price_1, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'global_avg_price_1': global_avg_price, 'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price_1, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'global_avg_price_1': global_avg_price, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) + PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) + FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price_1, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'global_avg_price_1': global_avg_price, 'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price_1, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'global_avg_price_1': global_avg_price, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 20bcf38c9..9fb817735 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('fullname', fname)], orderings=[(fname):asc_first]) - PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('fullname', JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name)))], orderings=[(JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))):asc_first]) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 5f6ca684d..e34eb6923 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,11 +1,12 @@ ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': SUM(n_above_avg)}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_above_avg': COUNT()}) - FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) - FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=o_totalprice >= 0.5:numeric * total_price_sum, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + PROJECT(columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'total_price_sum': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) + FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index 38a14ad9b..8b243cb8b 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('name', n_name), ('n_selected_custs', n_selected_custs)], orderings=[(n_name):asc_first]) - PROJECT(columns={'n_name': n_name, 'n_selected_custs': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_regionkey': t0.r_regionkey}) - FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 1:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) +ROOT(columns=[('name', n_name), ('n_selected_custs', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(n_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_regionkey': t0.r_regionkey}) + FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=SLICE(t1.c_comment, None:unknown, 1:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index a480ec3e3..8c61c789f 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,14 +1,16 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=domestic, columns={}) + PROJECT(columns={'domestic': name_16 == n_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'name_16': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index d179a88b7..eb1ec52c0 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -4,11 +4,9 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orde JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice_1, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice_1': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 9c6d69d8f..0c9acaa5b 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,30 +1,29 @@ -ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_above_avg_customers), ('n_above_avg_suppliers', n_above_avg_suppliers), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'max_c_acctbal': max_c_acctbal, 'min_c_acctbal': min_c_acctbal, 'n_above_avg_customers': DEFAULT_TO(n_rows, 0:numeric), 'n_above_avg_suppliers': DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.anything_n_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name_1, 'anything_n_regionkey': t0.anything_n_regionkey_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'min_c_acctbal': t0.min_c_acctbal_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - PROJECT(columns={'anything_n_name_1': anything_n_name, 'anything_n_nationkey_1': anything_n_nationkey, 'anything_n_regionkey_1': anything_n_regionkey, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows': n_rows}) - JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) - FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) +ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', DEFAULT_TO(n_rows, 0:numeric)), ('n_above_avg_suppliers', DEFAULT_TO(n_rows_1, 0:numeric)), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) + JOIN(condition=t0.anything_n_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name_1, 'anything_n_regionkey': t0.anything_n_regionkey_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'min_c_acctbal': t0.min_c_acctbal_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + PROJECT(columns={'anything_n_name_1': anything_n_name, 'anything_n_nationkey_1': anything_n_nationkey, 'anything_n_regionkey_1': anything_n_regionkey, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows': n_rows}) + JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) + FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_3.txt b/tests/test_plan_refsols/correl_3.txt index 92aafe048..97b696fec 100644 --- a/tests/test_plan_refsols/correl_3.txt +++ b/tests/test_plan_refsols/correl_3.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('n_nations', n_nations)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_nations': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.anything_r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'anything_r_regionkey': anything_r_regionkey}, aggregations={'n_rows': COUNT()}) - AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_r_regionkey': ANYTHING(r_regionkey)}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) +ROOT(columns=[('region_name', r_name), ('n_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.anything_r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'anything_r_regionkey': anything_r_regionkey}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_r_regionkey': ANYTHING(r_regionkey)}) + JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_32.txt b/tests/test_plan_refsols/correl_32.txt index e1c8d129c..a88f51fdb 100644 --- a/tests/test_plan_refsols/correl_32.txt +++ b/tests/test_plan_refsols/correl_32.txt @@ -1,6 +1,6 @@ ROOT(columns=[('customer_name', anything_c_name), ('delta', delta)], orderings=[(delta):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name, 'delta': delta}, orderings=[(delta):asc_first]) - PROJECT(columns={'anything_c_name': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name_1, 'delta': delta}, orderings=[(delta):asc_first]) + PROJECT(columns={'anything_c_name_1': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_name': ANYTHING(c_name), 'median_s_acctbal': MEDIAN(s_acctbal)}) JOIN(condition=SLICE(t1.s_phone, -1:numeric, None:unknown, None:unknown) == SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 's_acctbal': t1.s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/cumulative_stock_analysis.txt b/tests/test_plan_refsols/cumulative_stock_analysis.txt index f0c0d24b5..79ad18038 100644 --- a/tests/test_plan_refsols/cumulative_stock_analysis.txt +++ b/tests/test_plan_refsols/cumulative_stock_analysis.txt @@ -1,7 +1,5 @@ -ROOT(columns=[('date_time', sbTxDateTime), ('txn_within_day', txn_within_day), ('n_buys_within_day', n_buys_within_day), ('pct_apple_txns', pct_apple_txns), ('share_change', share_change), ('rolling_avg_amount', rolling_avg_amount)], orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'n_buys_within_day': RELCOUNT(args=[KEEP_IF(sbTxType, sbTxType == 'buy':string)], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True), 'pct_apple_txns': ROUND(100.0:numeric * RELSUM(args=[ISIN(sbTickerSymbol, ['AAPL', 'AMZN']:array[unknown])], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric), 'rolling_avg_amount': ROUND(RELAVG(args=[sbTxAmount], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric), 'sbTxDateTime': sbTxDateTime, 'share_change': RELSUM(args=[IFF(sbTxType == 'buy':string, sbTxShares, 0:numeric - sbTxShares)], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 'txn_within_day': RELSIZE(args=[], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True)}) - PROJECT(columns={'sbTickerSymbol': sbTickerSymbol, 'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxType': sbTxType, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) - FILTER(condition=MONTH(sbTxDateTime) == 4:numeric & YEAR(sbTxDateTime) == 2023:numeric & sbTxStatus == 'success':string, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxStatus': sbTxStatus, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) +ROOT(columns=[('date_time', sbTxDateTime), ('txn_within_day', RELSIZE(args=[], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True)), ('n_buys_within_day', RELCOUNT(args=[KEEP_IF(sbTxType, sbTxType == 'buy':string)], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True)), ('pct_apple_txns', ROUND(100.0:numeric * RELSUM(args=[ISIN(sbTickerSymbol, ['AAPL', 'AMZN']:array[unknown])], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric)), ('share_change', RELSUM(args=[IFF(sbTxType == 'buy':string, sbTxShares, 0:numeric - sbTxShares)], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True)), ('rolling_avg_amount', ROUND(RELAVG(args=[sbTxAmount], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric))], orderings=[(sbTxDateTime):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) + FILTER(condition=MONTH(sbTxDateTime) == 4:numeric & YEAR(sbTxDateTime) == 2023:numeric & sbTxStatus == 'success':string, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxStatus': sbTxStatus, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 1fa05bd28..b7b448977 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('largest_diff', largest_diff)], orderings=[(largest_diff):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) - PROJECT(columns={'c_name': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name_1, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) + PROJECT(columns={'c_name_1': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 52700565c..a6322fc0c 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_recent_value', total_recent_value)], orderings=[(total_recent_value):desc_last]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) - PROJECT(columns={'c_name': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name_1, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) + PROJECT(columns={'c_name_1': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/datetime_relative.txt b/tests/test_plan_refsols/datetime_relative.txt index 995e3bd66..ddd91f4af 100644 --- a/tests/test_plan_refsols/datetime_relative.txt +++ b/tests/test_plan_refsols/datetime_relative.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('d1', d1), ('d2', d2), ('d3', d3), ('d4', d4), ('d5', d5), ('d6', d6)], orderings=[(o_orderdate):asc_first]) - PROJECT(columns={'d1': DATETIME(o_orderdate, 'Start of Year':string), 'd2': DATETIME(o_orderdate, 'START OF MONTHS':string), 'd3': DATETIME(o_orderdate, '-11 years':string, '+9 months':string, ' - 7 DaYs ':string, '+5 h':string, '-3 minutes':string, '+1 second':string), 'd4': DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of hour':string), 'd5': DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of minute':string), 'd6': DATETIME(Timestamp('2025-07-14 12:58:45'):datetime, '+ 1000000 seconds':string), 'o_orderdate': o_orderdate}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_custkey):asc_first, (o_orderdate):asc_first]) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) +ROOT(columns=[('d1', DATETIME(o_orderdate, 'Start of Year':string)), ('d2', DATETIME(o_orderdate, 'START OF MONTHS':string)), ('d3', DATETIME(o_orderdate, '-11 years':string, '+9 months':string, ' - 7 DaYs ':string, '+5 h':string, '-3 minutes':string, '+1 second':string)), ('d4', DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of hour':string)), ('d5', DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of minute':string)), ('d6', DATETIME(Timestamp('2025-07-14 12:58:45'):datetime, '+ 1000000 seconds':string))], orderings=[(o_orderdate):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_custkey):asc_first, (o_orderdate):asc_first]) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index b1b75726f..dedd53aad 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,6 +1,6 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', c_custkey), ('c_bal', c_acctbal), ('cr_bal', cr_bal), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', cg_key)], orderings=[(n_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': cg_key, 'cr_bal': cr_bal, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}, orderings=[(n_name):asc_first]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal_1, 'c_custkey': c_custkey_1, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty_1, 'ps_partkey': ps_partkey_1, 'r_name': r_name_1, 's_suppkey': s_suppkey_1}, orderings=[(n_name):asc_first]) + PROJECT(columns={'account_balance_21': account_balance_21, 'c_acctbal_1': c_acctbal, 'c_custkey_1': c_custkey, 'key_54': key_54, 'n_name': n_name, 'ps_availqty_1': ps_availqty, 'ps_partkey_1': ps_partkey, 'r_name_1': r_name, 's_suppkey_1': s_suppkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'key_54': t1.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/dumb_aggregation.txt b/tests/test_plan_refsols/dumb_aggregation.txt index b178db074..2604e2675 100644 --- a/tests/test_plan_refsols/dumb_aggregation.txt +++ b/tests/test_plan_refsols/dumb_aggregation.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', a3), ('a4', a4), ('a5', a5), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) - PROJECT(columns={'a3': DEFAULT_TO(r_regionkey, 0:numeric), 'a4': IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric), 'a5': 1:numeric, 'n_name': n_name, 'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', DEFAULT_TO(r_regionkey, 0:numeric)), ('a4', IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric)), ('a5', 1:numeric), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index ef7fa9411..5bbc46de5 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,19 +1,18 @@ -ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', event_year), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) - LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': event_year, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) - PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': YEAR(ev_dt), 's_name': s_name, 't_name': t_name}) - JOIN(condition=t0.ev_key_1 == t1.ev_key_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_name': t0.ev_name_1, 's_name': t0.s_name_1, 't_name': t1.t_name_1}) - PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key_1': ev_key, 'ev_name_1': ev_name, 's_name_1': s_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name_1, 's_name': t1.s_name_1}) - PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key': ev_key, 'ev_name_1': ev_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) - FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - PROJECT(columns={'ev_key': ev_key, 's_name_1': s_name}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - PROJECT(columns={'ev_key_1': ev_key, 't_name_1': t_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) +ROOT(columns=[('event_name', ev_name_1), ('era_name', er_name_1), ('event_year', YEAR(ev_dt)), ('season_name', s_name_1), ('tod', t_name_1)], orderings=[(ev_dt):asc_first]) + LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name_1': er_name, 'ev_dt': ev_dt, 'ev_name_1': ev_name, 's_name_1': s_name, 't_name_1': t_name}, orderings=[(ev_dt):asc_first]) + JOIN(condition=t0.ev_key_1 == t1.ev_key_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_name': t0.ev_name_1, 's_name': t0.s_name_1, 't_name': t1.t_name_1}) + PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key_1': ev_key, 'ev_name_1': ev_name, 's_name_1': s_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name_1, 's_name': t1.s_name_1}) + PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key': ev_key, 'ev_name_1': ev_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + PROJECT(columns={'ev_key': ev_key, 's_name_1': s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + PROJECT(columns={'ev_key_1': ev_key, 't_name_1': t_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 51069767e..a39171a35 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,26 +1,25 @@ -ROOT(columns=[('season_name', anything_s_name), ('pct_season_searches', pct_season_searches), ('pct_event_searches', pct_event_searches)], orderings=[(anything_s_name):asc_first]) - PROJECT(columns={'anything_s_name': anything_s_name, 'pct_event_searches': ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'pct_season_searches': ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)}) - JOIN(condition=t0.anything_s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'anything_s_name': t0.anything_s_name, 'n_rows': t1.n_rows, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) - PROJECT(columns={'is_intra_season': DEFAULT_TO(n_rows, 0:numeric) > 0:numeric, 's_name': s_name}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) - AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) - PROJECT(columns={'is_intra_season': name_9 == s_name, 's_name': s_name}) - JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) +ROOT(columns=[('season_name', anything_s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(anything_s_name):asc_first]) + JOIN(condition=t0.anything_s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'anything_s_name': t0.anything_s_name, 'n_rows': t1.n_rows, 'sum_is_intra_season': t1.sum_is_intra_season}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) + PROJECT(columns={'is_intra_season': DEFAULT_TO(n_rows, 0:numeric) > 0:numeric, 's_name': s_name}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) + AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) + PROJECT(columns={'is_intra_season': name_9 == s_name, 's_name': s_name}) + JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt index 7cd2e96fe..50a303fbb 100644 --- a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt +++ b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('tod', anything_t_name), ('pct_searches', pct_searches)], orderings=[(anything_t_start_hour):asc_first]) - PROJECT(columns={'anything_t_name': anything_t_name, 'anything_t_start_hour': anything_t_start_hour, 'pct_searches': ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)}) - AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_name': ANYTHING(t_name), 'anything_t_start_hour': ANYTHING(t_start_hour), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) - SCAN(table=SEARCHES, columns={'search_ts': search_ts}) +ROOT(columns=[('tod', anything_t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) + AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_name': ANYTHING(t_name), 'anything_t_start_hour': ANYTHING(t_start_hour), 'n_rows': COUNT()}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=SEARCHES, columns={'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_search_results_by_tod.txt b/tests/test_plan_refsols/epoch_search_results_by_tod.txt index eee911c6b..c0edae245 100644 --- a/tests/test_plan_refsols/epoch_search_results_by_tod.txt +++ b/tests/test_plan_refsols/epoch_search_results_by_tod.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('tod', anything_t_name), ('pct_searches', pct_searches), ('avg_results', avg_results)], orderings=[(anything_t_start_hour):asc_first]) - PROJECT(columns={'anything_t_name': anything_t_name, 'anything_t_start_hour': anything_t_start_hour, 'avg_results': ROUND(avg_search_num_results, 2:numeric), 'pct_searches': ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)}) - AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_name': ANYTHING(t_name), 'anything_t_start_hour': ANYTHING(t_start_hour), 'avg_search_num_results': AVG(search_num_results), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) - SCAN(table=SEARCHES, columns={'search_num_results': search_num_results, 'search_ts': search_ts}) +ROOT(columns=[('tod', anything_t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)), ('avg_results', ROUND(avg_search_num_results, 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) + AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_name': ANYTHING(t_name), 'anything_t_start_hour': ANYTHING(t_start_hour), 'avg_search_num_results': AVG(search_num_results), 'n_rows': COUNT()}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=SEARCHES, columns={'search_num_results': search_num_results, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt index 05fa11b24..133bb0087 100644 --- a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt +++ b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('engine', search_engine), ('n_users', n_users)], orderings=[(search_engine):asc_first]) - PROJECT(columns={'n_users': DEFAULT_TO(ndistinct_user_id, 0:numeric), 'search_engine': search_engine}) - JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) - AGGREGATE(keys={'search_engine': search_engine}, aggregations={}) - SCAN(table=SEARCHES, columns={'search_engine': search_engine}) - AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_user_id': NDISTINCT(user_id)}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) - FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) - SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id}) +ROOT(columns=[('engine', search_engine), ('n_users', DEFAULT_TO(ndistinct_user_id, 0:numeric))], orderings=[(search_engine):asc_first]) + JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) + AGGREGATE(keys={'search_engine': search_engine}, aggregations={}) + SCAN(table=SEARCHES, columns={'search_engine': search_engine}) + AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_user_id': NDISTINCT(user_id)}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) + FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) + SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) + SCAN(table=USERS, columns={'user_id': user_id}) diff --git a/tests/test_plan_refsols/exponentiation.txt b/tests/test_plan_refsols/exponentiation.txt index 8c7272291..d87d0c19e 100644 --- a/tests/test_plan_refsols/exponentiation.txt +++ b/tests/test_plan_refsols/exponentiation.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('low_square', low_square), ('low_sqrt', low_sqrt), ('low_cbrt', low_cbrt)], orderings=[(low_square):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_cbrt': low_cbrt, 'low_sqrt': low_sqrt, 'low_square': low_square}, orderings=[(low_square):asc_first]) - PROJECT(columns={'low_cbrt': POWER(sbDpLow, 0.3333333333333333:numeric), 'low_sqrt': SQRT(sbDpLow), 'low_square': sbDpLow ** 2:numeric}) +ROOT(columns=[('low_square', low_square_1), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(low_square_1):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_square_1': low_square, 'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) + PROJECT(columns={'low_square': sbDpLow ** 2:numeric, 'sbDpLow': sbDpLow}) SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) diff --git a/tests/test_plan_refsols/floor_and_ceil_2.txt b/tests/test_plan_refsols/floor_and_ceil_2.txt index be5d28ade..2a4dcec3a 100644 --- a/tests/test_plan_refsols/floor_and_ceil_2.txt +++ b/tests/test_plan_refsols/floor_and_ceil_2.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', complete_parts), ('total_cost', total_cost)], orderings=[(total_cost):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'complete_parts': complete_parts, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': total_cost}, orderings=[(total_cost):desc_last]) - PROJECT(columns={'complete_parts': FLOOR(ps_availqty), 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': CEIL(ps_supplycost * FLOOR(ps_availqty))}) +ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', total_cost_1)], orderings=[(total_cost_1):desc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost_1': total_cost}, orderings=[(total_cost):desc_last]) + PROJECT(columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': CEIL(ps_supplycost * FLOOR(ps_availqty))}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/function_sampler.txt b/tests/test_plan_refsols/function_sampler.txt index fd7228904..b3bce5eaf 100644 --- a/tests/test_plan_refsols/function_sampler.txt +++ b/tests/test_plan_refsols/function_sampler.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('a', a), ('b', b), ('c', c), ('d', d), ('e', e), ('f', f)], orderings=[(c_address):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'a': a, 'b': b, 'c': c, 'c_address': c_address, 'd': d, 'e': e, 'f': f}, orderings=[(c_address):asc_first]) - PROJECT(columns={'a': JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown)), 'b': ROUND(c_acctbal, 1:numeric), 'c': KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string), 'c_address': c_address, 'd': PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string)), 'e': ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string)), 'f': ROUND(c_acctbal)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) +ROOT(columns=[('a', JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown))), ('b', ROUND(c_acctbal, 1:numeric)), ('c', KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string)), ('d', PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string))), ('e', ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string))), ('f', ROUND(c_acctbal))], orderings=[(c_address):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_address):asc_first]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/hour_minute_day.txt b/tests/test_plan_refsols/hour_minute_day.txt index 0ad3b9efe..30ead1632 100644 --- a/tests/test_plan_refsols/hour_minute_day.txt +++ b/tests/test_plan_refsols/hour_minute_day.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('transaction_id', sbTxId), ('_expr0', _expr0), ('_expr1', _expr1), ('_expr2', _expr2)], orderings=[(sbTxId):asc_first]) - PROJECT(columns={'_expr0': HOUR(sbTxDateTime), '_expr1': MINUTE(sbTxDateTime), '_expr2': SECOND(sbTxDateTime), 'sbTxId': sbTxId}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) - FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) +ROOT(columns=[('transaction_id', sbTxId), ('_expr0', HOUR(sbTxDateTime)), ('_expr1', MINUTE(sbTxDateTime)), ('_expr2', SECOND(sbTxDateTime))], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) + FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/minutes_seconds_datediff.txt b/tests/test_plan_refsols/minutes_seconds_datediff.txt index b49585ab6..4ae64d60c 100644 --- a/tests/test_plan_refsols/minutes_seconds_datediff.txt +++ b/tests/test_plan_refsols/minutes_seconds_datediff.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('x', sbTxDateTime), ('y', y), ('minutes_diff', minutes_diff), ('seconds_diff', seconds_diff)], orderings=[(sbTxDateTime):desc_last]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'minutes_diff': minutes_diff, 'sbTxDateTime': sbTxDateTime, 'seconds_diff': seconds_diff, 'y': y}, orderings=[(sbTxDateTime):desc_last]) - PROJECT(columns={'minutes_diff': DATEDIFF('m':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), 'sbTxDateTime': sbTxDateTime, 'seconds_diff': DATEDIFF('s':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), 'y': datetime.datetime(2023, 4, 3, 13, 16, 30):datetime}) - FILTER(condition=YEAR(sbTxDateTime) <= 2024:numeric, columns={'sbTxDateTime': sbTxDateTime}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) +ROOT(columns=[('x', sbTxDateTime), ('y', datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), ('minutes_diff', DATEDIFF('m':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime)), ('seconds_diff', DATEDIFF('s':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime))], orderings=[(sbTxDateTime):desc_last]) + LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime}, orderings=[(sbTxDateTime):desc_last]) + FILTER(condition=YEAR(sbTxDateTime) <= 2024:numeric, columns={'sbTxDateTime': sbTxDateTime}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index d438e6e1e..69a863b05 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,15 +1,16 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) - FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) + PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) + FILTER(condition=curr_year_total_spent > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'curr_year_total_spent': DEFAULT_TO(sum_month_total_spent, 0:numeric), 'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) + PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index a57480e07..e2d259395 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) - FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name_1, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) - PROJECT(columns={'n_name_1': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) + FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) + PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 86f173ea6..bca9f137e 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) - FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans_1, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId_1, 'sbTxType': sbTxType_1}) - PROJECT(columns={'n_ticker_trans_1': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId_1': sbTxTickerId, 'sbTxType_1': sbTxType}) + FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) @@ -10,8 +10,8 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId_1, 'sbTxType': sbTxType_1}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId_1': sbTxTickerId, 'sbTxType_1': sbTxType}) + FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 5ee7193e5..05b282309 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -2,25 +2,26 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=n_ticker_type_trans == 1:numeric | n_cust_type_trans == 1:numeric, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) + FILTER(condition=n_cust_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + PROJECT(columns={'n_cust_trans': DEFAULT_TO(sum_n_cust_type_trans, 0:numeric), 'sbTxCustId': sbTxCustId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) @@ -40,19 +41,21 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + FILTER(condition=n_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) + PROJECT(columns={'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index 738a980a1..caee91153 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,8 +4,8 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name_1, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) - PROJECT(columns={'c_name_1': c_name, 'c_nationkey': c_nationkey, 'o_orderkey_1': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/nation_window_aggs.txt b/tests/test_plan_refsols/nation_window_aggs.txt index ce8dc7c0a..aebe1aef0 100644 --- a/tests/test_plan_refsols/nation_window_aggs.txt +++ b/tests/test_plan_refsols/nation_window_aggs.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('nation_name', n_name), ('key_sum', key_sum), ('key_avg', key_avg), ('n_short_comment', n_short_comment), ('n_nations', n_nations)], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) - PROJECT(columns={'key_avg': RELAVG(args=[n_nationkey], partition=[], order=[]), 'key_sum': RELSUM(args=[n_nationkey], partition=[], order=[]), 'n_name': n_name, 'n_nations': RELSIZE(args=[], partition=[], order=[]), 'n_regionkey': n_regionkey, 'n_short_comment': RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[], order=[])}) - FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) +ROOT(columns=[('nation_name', n_name), ('key_sum', RELSUM(args=[n_nationkey], partition=[], order=[])), ('key_avg', RELAVG(args=[n_nationkey], partition=[], order=[])), ('n_short_comment', RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[], order=[])), ('n_nations', RELSIZE(args=[], partition=[], order=[]))], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) + FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/order_quarter_test.txt b/tests/test_plan_refsols/order_quarter_test.txt index 78b65eda8..8666a4eba 100644 --- a/tests/test_plan_refsols/order_quarter_test.txt +++ b/tests/test_plan_refsols/order_quarter_test.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('order_date', o_orderdate), ('quarter', quarter), ('quarter_start', quarter_start), ('next_quarter', next_quarter), ('prev_quarter', prev_quarter), ('two_quarters_ahead', two_quarters_ahead), ('two_quarters_behind', two_quarters_behind), ('quarters_since_1995', quarters_since_1995), ('quarters_until_2000', quarters_until_2000), ('same_quarter_prev_year', same_quarter_prev_year), ('same_quarter_next_year', same_quarter_next_year)], orderings=[(o_orderdate):asc_first]) - PROJECT(columns={'next_quarter': DATETIME(o_orderdate, '+1 quarter':string), 'o_orderdate': o_orderdate, 'prev_quarter': DATETIME(o_orderdate, '-1 quarter':string), 'quarter': QUARTER(o_orderdate), 'quarter_start': DATETIME(o_orderdate, 'start of quarter':string), 'quarters_since_1995': DATEDIFF('quarter':string, '1995-01-01':string, o_orderdate), 'quarters_until_2000': DATEDIFF('quarter':string, o_orderdate, '2000-01-01':string), 'same_quarter_next_year': DATETIME(o_orderdate, '+4 quarters':string), 'same_quarter_prev_year': DATETIME(o_orderdate, '-4 quarters':string), 'two_quarters_ahead': DATETIME(o_orderdate, '+2 quarters':string), 'two_quarters_behind': DATETIME(o_orderdate, '-2 quarters':string)}) - LIMIT(limit=Literal(value=1, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_first]) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) +ROOT(columns=[('order_date', o_orderdate), ('quarter', QUARTER(o_orderdate)), ('quarter_start', DATETIME(o_orderdate, 'start of quarter':string)), ('next_quarter', DATETIME(o_orderdate, '+1 quarter':string)), ('prev_quarter', DATETIME(o_orderdate, '-1 quarter':string)), ('two_quarters_ahead', DATETIME(o_orderdate, '+2 quarters':string)), ('two_quarters_behind', DATETIME(o_orderdate, '-2 quarters':string)), ('quarters_since_1995', DATEDIFF('quarter':string, '1995-01-01':string, o_orderdate)), ('quarters_until_2000', DATEDIFF('quarter':string, o_orderdate, '2000-01-01':string)), ('same_quarter_prev_year', DATETIME(o_orderdate, '-4 quarters':string)), ('same_quarter_next_year', DATETIME(o_orderdate, '+4 quarters':string))], orderings=[(o_orderdate):asc_first]) + LIMIT(limit=Literal(value=1, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_first]) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/ordering_name_overload.txt b/tests/test_plan_refsols/ordering_name_overload.txt index 655064a78..17c17a0d6 100644 --- a/tests/test_plan_refsols/ordering_name_overload.txt +++ b/tests/test_plan_refsols/ordering_name_overload.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('ordering_0', n_comment), ('ordering_1', n_name), ('ordering_2', n_nationkey), ('ordering_3', n_comment), ('ordering_4', n_nationkey), ('ordering_5', n_name), ('ordering_6', ordering_3), ('ordering_7', ordering_4), ('ordering_8', ordering_5)], orderings=[(ordering_3):asc_last, (ordering_4):desc_last, (ordering_5):asc_first]) - PROJECT(columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'ordering_3': LOWER(n_name), 'ordering_4': ABS(n_nationkey), 'ordering_5': LENGTH(n_comment)}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('ordering_0', n_comment), ('ordering_1', n_name), ('ordering_2', n_nationkey), ('ordering_3', n_comment), ('ordering_4', n_nationkey), ('ordering_5', n_name), ('ordering_6', LOWER(n_name)), ('ordering_7', ABS(n_nationkey)), ('ordering_8', LENGTH(n_comment))], orderings=[(LOWER(n_name)):asc_last, (ABS(n_nationkey)):desc_last, (LENGTH(n_comment)):asc_first]) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 254b7ce5c..7ac114d5f 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,6 +1,6 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', days_since_first_order)], orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey': o_orderkey}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey_1}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey_1': o_orderkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/padding_functions.txt b/tests/test_plan_refsols/padding_functions.txt index 9c078877f..f511fb5a2 100644 --- a/tests/test_plan_refsols/padding_functions.txt +++ b/tests/test_plan_refsols/padding_functions.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('original_name', sbCustName), ('ref_rpad', ref_rpad), ('ref_lpad', ref_lpad), ('right_padded', right_padded), ('left_padded', left_padded), ('truncated_right', truncated_right), ('truncated_left', truncated_left), ('zero_pad_right', zero_pad_right), ('zero_pad_left', zero_pad_left), ('right_padded_space', right_padded_space), ('left_padded_space', left_padded_space)], orderings=[(sbCustName):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'left_padded': left_padded, 'left_padded_space': left_padded_space, 'ref_lpad': ref_lpad, 'ref_rpad': ref_rpad, 'right_padded': right_padded, 'right_padded_space': right_padded_space, 'sbCustName': sbCustName, 'truncated_left': truncated_left, 'truncated_right': truncated_right, 'zero_pad_left': zero_pad_left, 'zero_pad_right': zero_pad_right}, orderings=[(sbCustName):asc_first]) - PROJECT(columns={'left_padded': LPAD(sbCustName, 30:numeric, '#':string), 'left_padded_space': LPAD(sbCustName, 30:numeric, ' ':string), 'ref_lpad': LPAD('Cust0001':string, 30:numeric, '*':string), 'ref_rpad': RPAD('Cust0001':string, 30:numeric, '*':string), 'right_padded': RPAD(sbCustName, 30:numeric, '*':string), 'right_padded_space': RPAD(sbCustName, 30:numeric, ' ':string), 'sbCustName': sbCustName, 'truncated_left': LPAD(sbCustName, 8:numeric, '-':string), 'truncated_right': RPAD(sbCustName, 8:numeric, '-':string), 'zero_pad_left': LPAD(sbCustName, 0:numeric, '.':string), 'zero_pad_right': RPAD(sbCustName, 0:numeric, '.':string)}) - SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) +ROOT(columns=[('original_name', sbCustName), ('ref_rpad', RPAD('Cust0001':string, 30:numeric, '*':string)), ('ref_lpad', LPAD('Cust0001':string, 30:numeric, '*':string)), ('right_padded', RPAD(sbCustName, 30:numeric, '*':string)), ('left_padded', LPAD(sbCustName, 30:numeric, '#':string)), ('truncated_right', RPAD(sbCustName, 8:numeric, '-':string)), ('truncated_left', LPAD(sbCustName, 8:numeric, '-':string)), ('zero_pad_right', RPAD(sbCustName, 0:numeric, '.':string)), ('zero_pad_left', LPAD(sbCustName, 0:numeric, '.':string)), ('right_padded_space', RPAD(sbCustName, 30:numeric, ' ':string)), ('left_padded_space', LPAD(sbCustName, 30:numeric, ' ':string))], orderings=[(sbCustName):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbCustName': sbCustName}, orderings=[(sbCustName):asc_first]) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 3b22ff6b8..93d6f9723 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('reduced_size', reduced_size), ('retail_price_int', retail_price_int), ('message', message), ('discount', l_discount), ('date_dmy', date_dmy), ('date_md', date_md), ('am_pm', am_pm)], orderings=[(l_discount):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'am_pm': am_pm, 'date_dmy': date_dmy, 'date_md': date_md, 'l_discount': l_discount, 'message': message, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(l_discount):desc_last]) - PROJECT(columns={'am_pm': STRING(l_receiptdate, '%H:%M%p':string), 'date_dmy': STRING(l_receiptdate, '%d-%m-%Y':string), 'date_md': STRING(l_receiptdate, '%m/%d':string), 'l_discount': l_discount, 'message': message, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'message': t0.message, 'reduced_size': t0.reduced_size, 'retail_price_int': t0.retail_price_int}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'message': message, 'p_partkey': p_partkey, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'message': JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size)), 'p_partkey': p_partkey, 'reduced_size': FLOAT(p_size / 2.5:numeric), 'retail_price_int': INTEGER(p_retailprice)}) +ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int_1), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'l_discount': l_discount, 'l_receiptdate': l_receiptdate, 'p_size': p_size, 'retail_price_int_1': retail_price_int}, orderings=[(l_discount):desc_last]) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) + PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int_1}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int_1': retail_price_int}, orderings=[(retail_price_int):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 391c0d4fb..4573cc866 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('name', p_name), ('qty_95', qty_95), ('qty_96', qty_96)], orderings=[(ordering_2):desc_last, (p_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'ordering_2': ordering_2, 'p_name': p_name, 'qty_95': qty_95, 'qty_96': qty_96}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) - PROJECT(columns={'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'qty_95': DEFAULT_TO(sum_l_quantity, 0:numeric), 'qty_96': DEFAULT_TO(agg_1, 0:numeric)}) +ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(ordering_2_1):desc_last, (p_name):asc_first]) + LIMIT(limit=Literal(value=3, type=NumericType()), columns={'agg_1': agg_1, 'ordering_2_1': ordering_2, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) + PROJECT(columns={'agg_1': agg_1, 'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/prev_next_regions.txt b/tests/test_plan_refsols/prev_next_regions.txt index 7b0786314..2c8e9b151 100644 --- a/tests/test_plan_refsols/prev_next_regions.txt +++ b/tests/test_plan_refsols/prev_next_regions.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('two_preceding', two_preceding), ('one_preceding', one_preceding), ('current', r_name), ('one_following', one_following), ('two_following', two_following)], orderings=[(r_name):asc_first]) - PROJECT(columns={'one_following': NEXT(args=[r_name], partition=[], order=[(r_name):asc_last]), 'one_preceding': PREV(args=[r_name], partition=[], order=[(r_name):asc_last]), 'r_name': r_name, 'two_following': PREV(args=[r_name], partition=[], order=[(r_name):asc_last], n=-2), 'two_preceding': PREV(args=[r_name], partition=[], order=[(r_name):asc_last], n=2)}) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('two_preceding', PREV(args=[r_name], partition=[], order=[(r_name):asc_last], n=2)), ('one_preceding', PREV(args=[r_name], partition=[], order=[(r_name):asc_last])), ('current', r_name), ('one_following', NEXT(args=[r_name], partition=[], order=[(r_name):asc_last])), ('two_following', PREV(args=[r_name], partition=[], order=[(r_name):asc_last], n=-2))], orderings=[(r_name):asc_first]) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 720bf6ef4..085f35880 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('rank', rank)], orderings=[(rank):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'rank': rank}, orderings=[(rank):asc_first]) - PROJECT(columns={'n_name': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name_1, 'rank': rank}, orderings=[(rank):asc_first]) + PROJECT(columns={'n_name_1': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index 9372d9bb7..c21a1b136 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', rank)], orderings=[(p_partkey):asc_first]) - LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': rank}, orderings=[(p_partkey):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) + LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name_1, 'rank': rank}, orderings=[(p_partkey):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'r_name_1': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) diff --git a/tests/test_plan_refsols/region_nation_window_aggs.txt b/tests/test_plan_refsols/region_nation_window_aggs.txt index 60a4990f4..a748f74d8 100644 --- a/tests/test_plan_refsols/region_nation_window_aggs.txt +++ b/tests/test_plan_refsols/region_nation_window_aggs.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('nation_name', n_name), ('key_sum', key_sum), ('key_avg', key_avg), ('n_short_comment', n_short_comment), ('n_nations', n_nations)], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) - PROJECT(columns={'key_avg': RELAVG(args=[n_nationkey], partition=[n_regionkey], order=[]), 'key_sum': RELSUM(args=[n_nationkey], partition=[n_regionkey], order=[]), 'n_name': n_name, 'n_nations': RELSIZE(args=[], partition=[n_regionkey], order=[]), 'n_regionkey': n_regionkey, 'n_short_comment': RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[n_regionkey], order=[])}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) +ROOT(columns=[('nation_name', n_name), ('key_sum', RELSUM(args=[n_nationkey], partition=[n_regionkey], order=[])), ('key_avg', RELAVG(args=[n_nationkey], partition=[n_regionkey], order=[])), ('n_short_comment', RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[n_regionkey], order=[])), ('n_nations', RELSIZE(args=[], partition=[n_regionkey], order=[]))], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/region_orders_from_nations_richest.txt b/tests/test_plan_refsols/region_orders_from_nations_richest.txt index f9cbc020a..d8ec49ff3 100644 --- a/tests/test_plan_refsols/region_orders_from_nations_richest.txt +++ b/tests/test_plan_refsols/region_orders_from_nations_richest.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('n_orders', n_orders)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('region_name', r_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/sign.txt b/tests/test_plan_refsols/sign.txt index 92ba41a82..7c2d6ac5c 100644 --- a/tests/test_plan_refsols/sign.txt +++ b/tests/test_plan_refsols/sign.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('high', sbDpHigh), ('high_neg', high_neg), ('high_zero', high_zero), ('sign_high', sign_high), ('sign_high_neg', sign_high_neg), ('sign_high_zero', sign_high_zero)], orderings=[(sbDpHigh):asc_first]) - PROJECT(columns={'high_neg': high_neg, 'high_zero': high_zero, 'sbDpHigh': sbDpHigh, 'sign_high': SIGN(sbDpHigh), 'sign_high_neg': SIGN(high_neg), 'sign_high_zero': SIGN(high_zero)}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'high_neg': high_neg, 'high_zero': high_zero, 'sbDpHigh': sbDpHigh}, orderings=[(sbDpHigh):asc_first]) - PROJECT(columns={'high_neg': -1:numeric * sbDpHigh, 'high_zero': 0:numeric * sbDpHigh, 'sbDpHigh': sbDpHigh}) - SCAN(table=main.sbDailyPrice, columns={'sbDpHigh': sbDpHigh}) +ROOT(columns=[('high', sbDpHigh), ('high_neg', -1:numeric * sbDpHigh), ('high_zero', 0:numeric * sbDpHigh), ('sign_high', SIGN(sbDpHigh)), ('sign_high_neg', SIGN(-1:numeric * sbDpHigh)), ('sign_high_zero', SIGN(0:numeric * sbDpHigh))], orderings=[(sbDpHigh):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbDpHigh': sbDpHigh}, orderings=[(sbDpHigh):asc_first]) + SCAN(table=main.sbDailyPrice, columns={'sbDpHigh': sbDpHigh}) diff --git a/tests/test_plan_refsols/simple_cross_10.txt b/tests/test_plan_refsols/simple_cross_10.txt index 7e068244a..7eb23ef93 100644 --- a/tests/test_plan_refsols/simple_cross_10.txt +++ b/tests/test_plan_refsols/simple_cross_10.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('n_other_nations', n_other_nations)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_other_nations': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) - FILTER(condition=name_3 != r_name, columns={'key_2': key_2, 'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'key_2': t1.r_regionkey, 'name_3': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('region_name', r_name), ('n_other_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) + FILTER(condition=name_3 != r_name, columns={'key_2': key_2, 'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'key_2': t1.r_regionkey, 'name_3': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_4.txt b/tests/test_plan_refsols/simple_cross_4.txt index c4d48ea2e..b2df7c11c 100644 --- a/tests/test_plan_refsols/simple_cross_4.txt +++ b/tests/test_plan_refsols/simple_cross_4.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('region_name', r_name), ('n_other_regions', n_other_regions)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_other_regions': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=name_3 != r_name & SLICE(name_3, None:unknown, 1:numeric, None:unknown) == SLICE(r_name, None:unknown, 1:numeric, None:unknown), columns={'r_regionkey': r_regionkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'name_3': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('region_name', r_name), ('n_other_regions', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=name_3 != r_name & SLICE(name_3, None:unknown, 1:numeric, None:unknown) == SLICE(r_name, None:unknown, 1:numeric, None:unknown), columns={'r_regionkey': r_regionkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'name_3': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index 889ab189c..d32e6626b 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,8 +1,9 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) - FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name_1, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) + PROJECT(columns={'c_name_1': c_name, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index bb1b832a9..4da08ecfe 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,15 +1,16 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) - FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name_1, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) + PROJECT(columns={'n_orders': n_orders, 'p_name_1': p_name, 's_name': s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 734ac6e6f..f63e075d7 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -5,8 +5,8 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows_1, 'p_name': p_name_1, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - PROJECT(columns={'n_rows_1': n_rows, 'p_name_1': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) + PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey_1, 'sum_l_quantity': t0.sum_l_quantity}) PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey_1': ps_partkey, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index d7031fdb8..a37b6f7bc 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', supplier_quantity), ('national_qty_pct', national_qty_pct)], orderings=[(national_qty_pct):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'national_qty_pct': national_qty_pct, 's_name': s_name, 'supplier_quantity': supplier_quantity}, orderings=[(national_qty_pct):desc_last]) - PROJECT(columns={'n_name': n_name, 'national_qty_pct': 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]), 's_name': s_name, 'supplier_quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) +ROOT(columns=[('supplier_name', s_name_1), ('nation_name', n_name_1), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', national_qty_pct_1)], orderings=[(national_qty_pct_1):desc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name_1': n_name, 'national_qty_pct_1': national_qty_pct, 's_name_1': s_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(national_qty_pct):desc_last]) + PROJECT(columns={'n_name': n_name, 'national_qty_pct': 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]), 's_name': s_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/suppliers_bal_diffs.txt b/tests/test_plan_refsols/suppliers_bal_diffs.txt index e95879b55..916816a00 100644 --- a/tests/test_plan_refsols/suppliers_bal_diffs.txt +++ b/tests/test_plan_refsols/suppliers_bal_diffs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', acctbal_delta)], orderings=[(acctbal_delta):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name, 's_name': s_name}, orderings=[(acctbal_delta):desc_last]) - PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name': r_name, 's_name': s_name}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name_1, 's_name': s_name_1}, orderings=[(acctbal_delta):desc_last]) + PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name_1': r_name, 's_name_1': s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index a380dffc1..0d29adc03 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,6 +1,6 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', purchase_country), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name, 'ir': ir, 'purchase_country': purchase_country}, orderings=[(ir):desc_last]) - PROJECT(columns={'co_name': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'purchase_country': name_2}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name_1, 'ir': ir, 'purchase_country': name_2}, orderings=[(ir):desc_last]) + PROJECT(columns={'co_name_1': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'name_2': name_2}) JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index ad8de3341..11675bca2 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,23 +1,22 @@ -ROOT(columns=[('country_name', co_name), ('made_ir', made_ir), ('sold_ir', sold_ir), ('user_ir', user_ir)], orderings=[(co_name):asc_first]) - PROJECT(columns={'co_name': co_name, 'made_ir': ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric), 'sold_ir': ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric), 'user_ir': ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric)}) - JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'de_purchase_country_id': de_purchase_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) +ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) - JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) - SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) + AGGREGATE(keys={'de_purchase_country_id': de_purchase_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) + SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt index 6c3682229..b040a95ee 100644 --- a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt +++ b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('error', er_name), ('pct', pct)], orderings=[(pct):desc_last]) - PROJECT(columns={'er_name': er_name, 'pct': ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric)}) - JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) - SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) - AGGREGATE(keys={'in_error_id': in_error_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_error_id': t0.in_error_id}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) +ROOT(columns=[('error', er_name), ('pct', ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[(ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric)):desc_last]) + JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) + SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) + AGGREGATE(keys={'in_error_id': in_error_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_error_id': t0.in_error_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 3ad6cf25b..89fe7388c 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('country', co_name), ('ir', ir)], orderings=[(co_name):asc_first]) - PROJECT(columns={'co_name': co_name, 'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) + PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index ae75e6956..baa9bd355 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -1,17 +1,16 @@ -ROOT(columns=[('year', release_year), ('ir', ir)], orderings=[(release_year):asc_first]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric), 'release_year': release_year}) - JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) - AGGREGATE(keys={'release_year': release_year}, aggregations={'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'n_rows': n_rows, 'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_id_1 == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - PROJECT(columns={'de_id_1': de_id, 'pr_release': pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric))], orderings=[(release_year):asc_first]) + JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) + AGGREGATE(keys={'release_year': release_year}, aggregations={'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'n_rows': n_rows, 'release_year': YEAR(pr_release)}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) + PROJECT(columns={'release_year': YEAR(pr_release)}) + JOIN(condition=t0.de_id_1 == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + PROJECT(columns={'de_id_1': de_id, 'pr_release': pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index e44097202..5914783da 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('brand', pr_brand), ('ir', ir)], orderings=[(pr_brand):asc_first]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand}) - AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_brand': pr_brand}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) + AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) + PROJECT(columns={'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_brand': pr_brand}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 295a255e1..e46df112f 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,30 +1,30 @@ -ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) - PROJECT(columns={'ir': ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric), 'month': month, 'month_0': JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_3': expr_3, 'month': MONTH(ca_dt), 'n_rows': n_rows, 'year': year}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows, 'year': t0.year_1}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows, 'year_1': year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) +ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows_1, 'month': t0.month_1, 'n_rows': t1.n_rows, 'year': t0.year_1}) + PROJECT(columns={'ca_dt_1': ca_dt, 'month_1': month, 'n_rows_1': n_rows, 'year_1': year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index df4147ff6..e2c60b089 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,6 +1,6 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}, orderings=[(ir):desc_last]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand_1, 'pr_name': pr_name_1, 'pr_type': pr_type_1}, orderings=[(ir):desc_last]) + PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand_1': pr_brand, 'pr_name_1': pr_name, 'pr_type_1': pr_type}) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 7a7c31f77..3734ddf0e 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -1,30 +1,28 @@ -ROOT(columns=[('years_since_release', years_since_release), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought_change), ('pct_incident_change', pct_incident_change), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(years_since_release):asc_first]) - PROJECT(columns={'cum_ir': ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric), 'n_devices': n_devices, 'n_incidents': n_incidents, 'pct_bought_change': ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric), 'pct_incident_change': ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric), 'years_since_release': year - YEAR(release_date)}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_incidents': DEFAULT_TO(sum_expr_4, 0:numeric), 'release_date': release_date, 'year': year}) - FILTER(condition=YEAR(release_date) <= year, columns={'release_date': release_date, 'sum_expr_4': sum_expr_4, 'sum_n_rows': sum_n_rows, 'year': year}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year}) - AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) - SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) +ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_4, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_n_rows, 0:numeric)), ('incidents', DEFAULT_TO(sum_expr_4, 0:numeric))], orderings=[(year - YEAR(release_date)):asc_first]) + FILTER(condition=YEAR(release_date) <= year, columns={'release_date': release_date, 'sum_expr_4': sum_expr_4, 'sum_n_rows': sum_n_rows, 'year': year}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year}) + AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) + SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index f3186c8d6..c00cf0803 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,19 +1,17 @@ -ROOT(columns=[('yr', year), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought_change), ('pct_incident_change', pct_incident_change), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) - PROJECT(columns={'cum_ir': ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric), 'n_devices': n_devices, 'n_incidents': n_incidents, 'pct_bought_change': ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric), 'pct_incident_change': ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric), 'year': year}) - PROJECT(columns={'n_devices': n_devices, 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) - FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) +ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) + FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'n_incidents': n_incidents, 'year': year}) + PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/time_threshold_reached.txt b/tests/test_plan_refsols/time_threshold_reached.txt index e83bd7ade..8b94ac8f2 100644 --- a/tests/test_plan_refsols/time_threshold_reached.txt +++ b/tests/test_plan_refsols/time_threshold_reached.txt @@ -1,7 +1,7 @@ ROOT(columns=[('date_time', sbTxDateTime)], orderings=[(sbTxDateTime):asc_first]) FILTER(condition=RANKING(args=[], partition=[txn_day], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) - FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day_1}) - PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day_1': txn_day}) + FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) + PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt index 01860bd68..404152d0e 100644 --- a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt +++ b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('name', n_name), ('total_bal', total_bal)], orderings=[(ordering_0):asc_last]) - PROJECT(columns={'n_name': n_name, 'ordering_0': ordering_0, 'total_bal': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'ordering_0': ordering_0, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) +ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'ordering_0': ordering_0, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(ordering_0):asc_last]) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/topk_order_by_calc.txt b/tests/test_plan_refsols/topk_order_by_calc.txt index 3c0f38516..58dcdcb9f 100644 --- a/tests/test_plan_refsols/topk_order_by_calc.txt +++ b/tests/test_plan_refsols/topk_order_by_calc.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('region_name', r_name), ('name_length', name_length)], orderings=[(r_name):asc_last]) - PROJECT(columns={'name_length': LENGTH(r_name), 'r_name': r_name}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_name': r_name}, orderings=[(r_name):asc_last]) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('region_name', r_name), ('name_length', LENGTH(r_name))], orderings=[(r_name):asc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_name': r_name}, orderings=[(r_name):asc_last]) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/tpch_q1.txt b/tests/test_plan_refsols/tpch_q1.txt index a33019900..14e38c38f 100644 --- a/tests/test_plan_refsols/tpch_q1.txt +++ b/tests/test_plan_refsols/tpch_q1.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('L_RETURNFLAG', l_returnflag), ('L_LINESTATUS', l_linestatus), ('SUM_QTY', SUM_QTY), ('SUM_BASE_PRICE', SUM_BASE_PRICE), ('SUM_DISC_PRICE', SUM_DISC_PRICE), ('SUM_CHARGE', SUM_CHARGE), ('AVG_QTY', avg_l_quantity), ('AVG_PRICE', avg_l_extendedprice), ('AVG_DISC', avg_l_discount), ('COUNT_ORDER', n_rows)], orderings=[(l_returnflag):asc_first, (l_linestatus):asc_first]) - PROJECT(columns={'SUM_BASE_PRICE': DEFAULT_TO(sum_l_extendedprice, 0:numeric), 'SUM_CHARGE': DEFAULT_TO(sum_expr_8, 0:numeric), 'SUM_DISC_PRICE': DEFAULT_TO(sum_expr_9, 0:numeric), 'SUM_QTY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'avg_l_discount': avg_l_discount, 'avg_l_extendedprice': avg_l_extendedprice, 'avg_l_quantity': avg_l_quantity, 'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag, 'n_rows': n_rows}) - AGGREGATE(keys={'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag}, aggregations={'avg_l_discount': AVG(l_discount), 'avg_l_extendedprice': AVG(l_extendedprice), 'avg_l_quantity': AVG(l_quantity), 'n_rows': COUNT(), 'sum_expr_8': SUM(expr_8), 'sum_expr_9': SUM(expr_9), 'sum_l_extendedprice': SUM(l_extendedprice), 'sum_l_quantity': SUM(l_quantity)}) - PROJECT(columns={'expr_8': l_extendedprice * 1:numeric - l_discount * 1:numeric + l_tax, 'expr_9': l_extendedprice * 1:numeric - l_discount, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag}) - FILTER(condition=l_shipdate <= datetime.date(1998, 12, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_shipdate': l_shipdate, 'l_tax': l_tax}) +ROOT(columns=[('L_RETURNFLAG', l_returnflag), ('L_LINESTATUS', l_linestatus), ('SUM_QTY', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('SUM_BASE_PRICE', DEFAULT_TO(sum_l_extendedprice, 0:numeric)), ('SUM_DISC_PRICE', DEFAULT_TO(sum_expr_9, 0:numeric)), ('SUM_CHARGE', DEFAULT_TO(sum_expr_8, 0:numeric)), ('AVG_QTY', avg_l_quantity), ('AVG_PRICE', avg_l_extendedprice), ('AVG_DISC', avg_l_discount), ('COUNT_ORDER', n_rows)], orderings=[(l_returnflag):asc_first, (l_linestatus):asc_first]) + AGGREGATE(keys={'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag}, aggregations={'avg_l_discount': AVG(l_discount), 'avg_l_extendedprice': AVG(l_extendedprice), 'avg_l_quantity': AVG(l_quantity), 'n_rows': COUNT(), 'sum_expr_8': SUM(expr_8), 'sum_expr_9': SUM(expr_9), 'sum_l_extendedprice': SUM(l_extendedprice), 'sum_l_quantity': SUM(l_quantity)}) + PROJECT(columns={'expr_8': l_extendedprice * 1:numeric - l_discount * 1:numeric + l_tax, 'expr_9': l_extendedprice * 1:numeric - l_discount, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag}) + FILTER(condition=l_shipdate <= datetime.date(1998, 12, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_shipdate': l_shipdate, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index a77131552..4ab824798 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) + LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal_1, 'c_address': c_address_1, 'c_comment': c_comment_1, 'c_custkey': c_custkey, 'c_name': c_name_1, 'c_phone': c_phone_1, 'n_name': n_name_1}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) + PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey': c_custkey, 'c_name_1': c_name, 'c_phone_1': c_phone, 'n_name_1': n_name}) JOIN(condition=t0.c_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal_1, 'c_address': t0.c_address_1, 'c_comment': t0.c_comment_1, 'c_custkey': t0.c_custkey_1, 'c_name': t0.c_name_1, 'c_phone': t0.c_phone_1, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) PROJECT(columns={'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'c_nationkey_1': c_nationkey, 'c_phone_1': c_phone, 'sum_expr_1': sum_expr_1}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index de30994c3..207c69bc1 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,7 +1,7 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}, orderings=[(VALUE):desc_last]) - FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey_1}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey_1': ps_partkey, 'sum_metric': sum_metric}) + FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, 'ps_partkey': ps_partkey}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) PROJECT(columns={'metric': ps_supplycost * ps_availqty}) diff --git a/tests/test_plan_refsols/tpch_q12.txt b/tests/test_plan_refsols/tpch_q12.txt index 6a11fe1ab..7192a4e23 100644 --- a/tests/test_plan_refsols/tpch_q12.txt +++ b/tests/test_plan_refsols/tpch_q12.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', HIGH_LINE_COUNT), ('LOW_LINE_COUNT', LOW_LINE_COUNT)], orderings=[(l_shipmode):asc_first]) - PROJECT(columns={'HIGH_LINE_COUNT': DEFAULT_TO(sum_is_high_priority, 0:numeric), 'LOW_LINE_COUNT': DEFAULT_TO(sum_expr_2, 0:numeric), 'l_shipmode': l_shipmode}) - AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(expr_2), 'sum_is_high_priority': SUM(is_high_priority)}) - PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'l_shipmode': l_shipmode}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) - FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', DEFAULT_TO(sum_is_high_priority, 0:numeric)), ('LOW_LINE_COUNT', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(l_shipmode):asc_first]) + AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(expr_2), 'sum_is_high_priority': SUM(is_high_priority)}) + PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'l_shipmode': l_shipmode}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) + FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index 877cbcd22..ffb308cdf 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,18 +1,16 @@ -ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', TOTAL_REVENUE)], orderings=[(s_suppkey):asc_first]) - PROJECT(columns={'TOTAL_REVENUE': DEFAULT_TO(sum_expr_3, 0:numeric), 's_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) - JOIN(condition=DEFAULT_TO(t1.sum_expr_3_1, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) - PROJECT(columns={'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) - SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) - PROJECT(columns={'l_suppkey': l_suppkey, 'sum_expr_3': sum_expr_3, 'sum_expr_3_1': sum_expr_3}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) - PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[(s_suppkey):asc_first]) + JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) + PROJECT(columns={'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) + PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) + PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index d1bcc2be8..8de1daa53 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,7 +1,7 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey_1, 'c_name': c_name_1, 'o_orderdate': o_orderdate_1, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice_1}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'o_orderdate_1': o_orderdate, 'o_orderkey_1': o_orderkey, 'o_totalprice_1': o_totalprice}) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index cce78a05f..3baa9fc80 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -7,13 +7,14 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'p_partkey_1': p_partkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) + PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'p_partkey_1': p_partkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 06077eb2c..ff832eb90 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,15 +1,16 @@ -ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', TOTACCTBAL)], orderings=[(cntry_code):asc_first]) - PROJECT(columns={'TOTACCTBAL': DEFAULT_TO(sum_c_acctbal, 0:numeric), 'cntry_code': cntry_code, 'n_rows': n_rows}) - AGGREGATE(keys={'cntry_code': cntry_code}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'cntry_code': cntry_code}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'cntry_code': t0.cntry_code, 'n_rows': t1.n_rows}) - FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': cntry_code}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) - FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) - AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) - FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) + AGGREGATE(keys={'cntry_code': cntry_code}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'cntry_code': cntry_code}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'cntry_code': t0.cntry_code, 'n_rows': t1.n_rows}) + FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': cntry_code}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) + FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) + AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) + FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + PROJECT(columns={'c_acctbal': c_acctbal, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) + FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index d7cf1c363..99e4bd091 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,20 +1,19 @@ -ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', REVENUE)], orderings=[(REVENUE):desc_last]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_value, 0:numeric), 'anything_n_name': anything_n_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey_1}) - PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) + PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) + JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey_1}) + PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index a2698fc77..e721ef7c6 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,17 +1,16 @@ -ROOT(columns=[('SUPP_NATION', n_name), ('CUST_NATION', cust_nation), ('L_YEAR', l_year), ('REVENUE', REVENUE)], orderings=[(n_name):asc_first, (cust_nation):asc_first, (l_year):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_volume, 0:numeric), 'cust_nation': cust_nation, 'l_year': l_year, 'n_name': n_name}) - AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': l_year, 'n_name': n_name}, aggregations={'sum_volume': SUM(volume)}) - PROJECT(columns={'cust_nation': name_8, 'l_year': YEAR(l_shipdate), 'n_name': n_name, 'volume': l_extendedprice * 1:numeric - l_discount}) - FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name, 'name_8': name_8}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) +ROOT(columns=[('SUPP_NATION', n_name), ('CUST_NATION', cust_nation), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(n_name):asc_first, (cust_nation):asc_first, (l_year):asc_first]) + AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': l_year, 'n_name': n_name}, aggregations={'sum_volume': SUM(volume)}) + PROJECT(columns={'cust_nation': name_8, 'l_year': YEAR(l_shipdate), 'n_name': n_name, 'volume': l_extendedprice * 1:numeric - l_discount}) + FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name, 'name_8': name_8}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index 23af1d803..8abc81dc2 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -1,17 +1,16 @@ -ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', AMOUNT)], orderings=[(n_name):asc_first, (o_year):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'AMOUNT': AMOUNT, 'n_name': n_name, 'o_year': o_year}, orderings=[(n_name):asc_first, (o_year):desc_last]) - PROJECT(columns={'AMOUNT': DEFAULT_TO(sum_value, 0:numeric), 'n_name': n_name, 'o_year': o_year}) - AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'o_year': o_year, 'sum_value': sum_value}, orderings=[(n_name):asc_first, (o_year):desc_last]) + AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) + PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index c436e164f..96e986806 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice_1 < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - PROJECT(columns={'o_totalprice_1': o_totalprice}) + FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[NULL_1], partition=[], order=[]), columns={}) + PROJECT(columns={'NULL_1': None:unknown, 'o_totalprice': o_totalprice}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index a367c4443..c85511eca 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) + LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId_1, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId_1': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index 80af8f609..ac1149b97 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) + LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId_1, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId_1': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/years_months_days_hours_datediff.txt b/tests/test_plan_refsols/years_months_days_hours_datediff.txt index afee87604..f7f1686d0 100644 --- a/tests/test_plan_refsols/years_months_days_hours_datediff.txt +++ b/tests/test_plan_refsols/years_months_days_hours_datediff.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('x', sbTxDateTime), ('y1', y1), ('years_diff', years_diff), ('c_years_diff', c_years_diff), ('c_y_diff', c_y_diff), ('y_diff', y_diff), ('months_diff', months_diff), ('c_months_diff', c_months_diff), ('mm_diff', mm_diff), ('days_diff', days_diff), ('c_days_diff', c_days_diff), ('c_d_diff', c_d_diff), ('d_diff', d_diff), ('hours_diff', hours_diff), ('c_hours_diff', c_hours_diff), ('c_h_diff', c_h_diff)], orderings=[(years_diff):asc_first]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'c_d_diff': c_d_diff, 'c_days_diff': c_days_diff, 'c_h_diff': c_h_diff, 'c_hours_diff': c_hours_diff, 'c_months_diff': c_months_diff, 'c_y_diff': c_y_diff, 'c_years_diff': c_years_diff, 'd_diff': d_diff, 'days_diff': days_diff, 'hours_diff': hours_diff, 'mm_diff': mm_diff, 'months_diff': months_diff, 'sbTxDateTime': sbTxDateTime, 'y1': y1, 'y_diff': y_diff, 'years_diff': years_diff}, orderings=[(years_diff):asc_first]) - PROJECT(columns={'c_d_diff': DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_days_diff': DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_h_diff': DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_hours_diff': DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_months_diff': DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_y_diff': DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_years_diff': DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'd_diff': DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'days_diff': DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'hours_diff': DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'mm_diff': DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'months_diff': DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'sbTxDateTime': sbTxDateTime, 'y1': datetime.datetime(2025, 5, 2, 11, 0):datetime, 'y_diff': DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'years_diff': DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)}) +ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', years_diff_1), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(years_diff_1):asc_first]) + LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'years_diff_1': years_diff}, orderings=[(years_diff):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'years_diff': DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)}) FILTER(condition=YEAR(sbTxDateTime) < 2025:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_plan_refsols/yoy_change_in_num_orders.txt b/tests/test_plan_refsols/yoy_change_in_num_orders.txt index ea9eab88a..8a2b5f26c 100644 --- a/tests/test_plan_refsols/yoy_change_in_num_orders.txt +++ b/tests/test_plan_refsols/yoy_change_in_num_orders.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('year', year), ('current_year_orders', n_rows), ('pct_change', pct_change)], orderings=[(year):asc_first]) - PROJECT(columns={'n_rows': n_rows, 'pct_change': 100.0:numeric * n_rows - PREV(args=[n_rows], partition=[], order=[(year):asc_last]) / PREV(args=[n_rows], partition=[], order=[(year):asc_last]), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) +ROOT(columns=[('year', year), ('current_year_orders', n_rows), ('pct_change', 100.0:numeric * n_rows - PREV(args=[n_rows], partition=[], order=[(year):asc_last]) / PREV(args=[n_rows], partition=[], order=[(year):asc_last]))], orderings=[(year):asc_first]) + AGGREGATE(keys={'year': year}, aggregations={'n_rows': COUNT()}) + PROJECT(columns={'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_sql_refsols/cumulative_stock_analysis_ansi.sql b/tests/test_sql_refsols/cumulative_stock_analysis_ansi.sql index a24c81c4d..a5952cee5 100644 --- a/tests/test_sql_refsols/cumulative_stock_analysis_ansi.sql +++ b/tests/test_sql_refsols/cumulative_stock_analysis_ansi.sql @@ -1,42 +1,32 @@ -WITH _t0 AS ( - SELECT - COUNT( - CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END - ) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, - ROUND( - ( - 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - ) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS pct_apple_txns, - ROUND( - AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS rolling_avg_amount, - SUM( - CASE - WHEN sbtransaction.sbtxtype = 'buy' - THEN sbtransaction.sbtxshares - ELSE 0 - sbtransaction.sbtxshares - END - ) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, - COUNT(*) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, - sbtransaction.sbtxdatetime - FROM main.sbtransaction AS sbtransaction - JOIN main.sbticker AS sbticker - ON sbticker.sbtickerid = sbtransaction.sbtxtickerid - WHERE - EXTRACT(MONTH FROM CAST(sbtransaction.sbtxdatetime AS DATETIME)) = 4 - AND EXTRACT(YEAR FROM CAST(sbtransaction.sbtxdatetime AS DATETIME)) = 2023 - AND sbtransaction.sbtxstatus = 'success' -) SELECT - sbtxdatetime AS date_time, - txn_within_day, - n_buys_within_day, - pct_apple_txns, - share_change, - rolling_avg_amount -FROM _t0 + sbtransaction.sbtxdatetime AS date_time, + COUNT(*) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, + COUNT( + CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END + ) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, + ROUND( + ( + 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + ) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS pct_apple_txns, + SUM( + CASE + WHEN sbtransaction.sbtxtype = 'buy' + THEN sbtransaction.sbtxshares + ELSE 0 - sbtransaction.sbtxshares + END + ) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, + ROUND( + AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS rolling_avg_amount +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid +WHERE + EXTRACT(MONTH FROM CAST(sbtransaction.sbtxdatetime AS DATETIME)) = 4 + AND EXTRACT(YEAR FROM CAST(sbtransaction.sbtxdatetime AS DATETIME)) = 2023 + AND sbtransaction.sbtxstatus = 'success' ORDER BY - sbtxdatetime + sbtransaction.sbtxdatetime diff --git a/tests/test_sql_refsols/cumulative_stock_analysis_sqlite.sql b/tests/test_sql_refsols/cumulative_stock_analysis_sqlite.sql index 76948077b..4d63f1bc5 100644 --- a/tests/test_sql_refsols/cumulative_stock_analysis_sqlite.sql +++ b/tests/test_sql_refsols/cumulative_stock_analysis_sqlite.sql @@ -1,42 +1,32 @@ -WITH _t0 AS ( - SELECT - COUNT( - CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END - ) OVER (PARTITION BY DATE(sbtransaction.sbtxdatetime, 'start of day') ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, - ROUND( - CAST(( - 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - ) AS REAL) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS pct_apple_txns, - ROUND( - AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS rolling_avg_amount, - SUM( - IIF( - sbtransaction.sbtxtype = 'buy', - sbtransaction.sbtxshares, - 0 - sbtransaction.sbtxshares - ) - ) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, - COUNT(*) OVER (PARTITION BY DATE(sbtransaction.sbtxdatetime, 'start of day') ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, - sbtransaction.sbtxdatetime - FROM main.sbtransaction AS sbtransaction - JOIN main.sbticker AS sbticker - ON sbticker.sbtickerid = sbtransaction.sbtxtickerid - WHERE - CAST(STRFTIME('%Y', sbtransaction.sbtxdatetime) AS INTEGER) = 2023 - AND CAST(STRFTIME('%m', sbtransaction.sbtxdatetime) AS INTEGER) = 4 - AND sbtransaction.sbtxstatus = 'success' -) SELECT - sbtxdatetime AS date_time, - txn_within_day, - n_buys_within_day, - pct_apple_txns, - share_change, - rolling_avg_amount -FROM _t0 + sbtransaction.sbtxdatetime AS date_time, + COUNT(*) OVER (PARTITION BY DATE(sbtransaction.sbtxdatetime, 'start of day') ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, + COUNT( + CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END + ) OVER (PARTITION BY DATE(sbtransaction.sbtxdatetime, 'start of day') ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, + ROUND( + CAST(( + 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + ) AS REAL) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS pct_apple_txns, + SUM( + IIF( + sbtransaction.sbtxtype = 'buy', + sbtransaction.sbtxshares, + 0 - sbtransaction.sbtxshares + ) + ) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, + ROUND( + AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS rolling_avg_amount +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid +WHERE + CAST(STRFTIME('%Y', sbtransaction.sbtxdatetime) AS INTEGER) = 2023 + AND CAST(STRFTIME('%m', sbtransaction.sbtxdatetime) AS INTEGER) = 4 + AND sbtransaction.sbtxstatus = 'success' ORDER BY - sbtxdatetime + sbtransaction.sbtxdatetime diff --git a/tests/test_sql_refsols/datediff_ansi.sql b/tests/test_sql_refsols/datediff_ansi.sql index 82f32d329..aa7347c30 100644 --- a/tests/test_sql_refsols/datediff_ansi.sql +++ b/tests/test_sql_refsols/datediff_ansi.sql @@ -1,16 +1,24 @@ +WITH _t0 AS ( + SELECT + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff_1, + sbtxdatetime + FROM main.sbtransaction + WHERE + EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 + ORDER BY + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) + LIMIT 30 +) SELECT sbtxdatetime AS x, CAST('2025-05-02 11:00:00' AS TIMESTAMP) AS y1, CAST('2023-04-03 13:16:30' AS TIMESTAMP) AS y, - DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff, + years_diff_1 AS years_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MONTH) AS months_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), DAY) AS days_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), HOUR) AS hours_diff, DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MINUTE) AS minutes_diff, DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), SECOND) AS seconds_diff -FROM main.sbtransaction -WHERE - EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 +FROM _t0 ORDER BY - years_diff -LIMIT 30 + years_diff_1 diff --git a/tests/test_sql_refsols/datediff_sqlite.sql b/tests/test_sql_refsols/datediff_sqlite.sql index bc6d68474..734f72ca9 100644 --- a/tests/test_sql_refsols/datediff_sqlite.sql +++ b/tests/test_sql_refsols/datediff_sqlite.sql @@ -1,8 +1,19 @@ +WITH _t0 AS ( + SELECT + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff_1, + sbtxdatetime + FROM main.sbtransaction + WHERE + CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 + ORDER BY + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) + LIMIT 30 +) SELECT sbtxdatetime AS x, '2025-05-02 11:00:00' AS y1, '2023-04-03 13:16:30' AS y, - CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff, + years_diff_1 AS years_diff, ( CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) ) * 12 + CAST(STRFTIME('%m', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%m', sbtxdatetime) AS INTEGER) AS months_diff, @@ -24,9 +35,6 @@ SELECT ) AS INTEGER) * 24 + CAST(STRFTIME('%H', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%H', sbtxdatetime) AS INTEGER) ) * 60 + CAST(STRFTIME('%M', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%M', sbtxdatetime) AS INTEGER) ) * 60 + CAST(STRFTIME('%S', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%S', sbtxdatetime) AS INTEGER) AS seconds_diff -FROM main.sbtransaction -WHERE - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 +FROM _t0 ORDER BY - years_diff -LIMIT 30 + years_diff_1 diff --git a/tests/test_sql_refsols/defog_broker_adv3_ansi.sql b/tests/test_sql_refsols/defog_broker_adv3_ansi.sql index ca84729c3..46087024c 100644 --- a/tests/test_sql_refsols/defog_broker_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv3_ansi.sql @@ -18,4 +18,6 @@ LEFT JOIN _s1 AS _s1 WHERE NOT _s1.n_rows IS NULL AND _s1.n_rows >= 5 ORDER BY - success_rate + ( + 100.0 * COALESCE(_s1.sum_expr_2, 0) + ) / COALESCE(_s1.n_rows, 0) diff --git a/tests/test_sql_refsols/defog_broker_adv3_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv3_sqlite.sql index 1b5796b97..c30fd13b9 100644 --- a/tests/test_sql_refsols/defog_broker_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv3_sqlite.sql @@ -18,4 +18,6 @@ LEFT JOIN _s1 AS _s1 WHERE NOT _s1.n_rows IS NULL AND _s1.n_rows >= 5 ORDER BY - success_rate + CAST(( + 100.0 * COALESCE(_s1.sum_expr_2, 0) + ) AS REAL) / COALESCE(_s1.n_rows, 0) diff --git a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql index 479db90cc..ab77ecbff 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql @@ -32,9 +32,10 @@ WITH _s0 AS ( sbdptickerid ), _t0 AS ( SELECT - SUM(_s0.sum_sbdpclose) / SUM(_s0.count_sbdpclose) AS avg_close, MAX(_s0.max_high) AS max_high, MIN(_s0.min_low) AS min_low, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, _s0.month, sbticker.sbtickersymbol FROM _s0 AS _s0 @@ -47,10 +48,12 @@ WITH _s0 AS ( SELECT sbtickersymbol AS symbol, month, - avg_close, + sum_sum_sbdpclose / sum_count_sbdpclose AS avg_close, max_high, min_low, ( - avg_close - LAG(avg_close, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) - ) / LAG(avg_close, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) AS momc + ( + sum_sum_sbdpclose / sum_count_sbdpclose + ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) + ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql index f61260e72..e74219954 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql @@ -32,9 +32,10 @@ WITH _s0 AS ( sbdptickerid ), _t0 AS ( SELECT - CAST(SUM(_s0.sum_sbdpclose) AS REAL) / SUM(_s0.count_sbdpclose) AS avg_close, MAX(_s0.max_high) AS max_high, MIN(_s0.min_low) AS min_low, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, _s0.month, sbticker.sbtickersymbol FROM _s0 AS _s0 @@ -47,10 +48,12 @@ WITH _s0 AS ( SELECT sbtickersymbol AS symbol, month, - avg_close, + CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose AS avg_close, max_high, min_low, CAST(( - avg_close - LAG(avg_close, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) - ) AS REAL) / LAG(avg_close, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc + ( + CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose + ) - LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) AS REAL) / LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql index 26f6f11a1..bb8ea4609 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql @@ -6,14 +6,22 @@ WITH _s1 AS ( FROM main.sbtransaction GROUP BY sbtxtickerid +), _t0 AS ( + SELECT + sbticker.sbtickersymbol AS sbtickersymbol_1, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount_1, + _s1.n_rows + FROM main.sbticker AS sbticker + LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid + ORDER BY + COALESCE(_s1.sum_sbtxamount, 0) DESC + LIMIT 10 ) SELECT - sbticker.sbtickersymbol AS symbol, - COALESCE(_s1.n_rows, 0) AS num_transactions, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount -FROM main.sbticker AS sbticker -LEFT JOIN _s1 AS _s1 - ON _s1.sbtxtickerid = sbticker.sbtickerid + sbtickersymbol_1 AS symbol, + COALESCE(n_rows, 0) AS num_transactions, + total_amount_1 AS total_amount +FROM _t0 ORDER BY - total_amount DESC -LIMIT 10 + total_amount_1 DESC diff --git a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql index 26f6f11a1..bb8ea4609 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql @@ -6,14 +6,22 @@ WITH _s1 AS ( FROM main.sbtransaction GROUP BY sbtxtickerid +), _t0 AS ( + SELECT + sbticker.sbtickersymbol AS sbtickersymbol_1, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount_1, + _s1.n_rows + FROM main.sbticker AS sbticker + LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid + ORDER BY + COALESCE(_s1.sum_sbtxamount, 0) DESC + LIMIT 10 ) SELECT - sbticker.sbtickersymbol AS symbol, - COALESCE(_s1.n_rows, 0) AS num_transactions, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount -FROM main.sbticker AS sbticker -LEFT JOIN _s1 AS _s1 - ON _s1.sbtxtickerid = sbticker.sbtickerid + sbtickersymbol_1 AS symbol, + COALESCE(n_rows, 0) AS num_transactions, + total_amount_1 AS total_amount +FROM _t0 ORDER BY - total_amount DESC -LIMIT 10 + total_amount_1 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv5_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv5_ansi.sql index 098911866..182369122 100644 --- a/tests/test_sql_refsols/defog_dealership_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv5_ansi.sql @@ -6,23 +6,15 @@ WITH _s1 AS ( FROM main.sales GROUP BY salesperson_id -), _t0 AS ( - SELECT - RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC NULLS FIRST) AS sales_rank, - COALESCE(_s1.sum_sale_price, 0) AS total_sales, - salespersons.first_name, - salespersons.last_name, - _s1.n_rows - FROM main.salespersons AS salespersons - JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id ) SELECT - first_name, - last_name, - total_sales, - n_rows AS num_sales, - sales_rank -FROM _t0 + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.sum_sale_price, 0) AS total_sales, + _s1.n_rows AS num_sales, + RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC NULLS FIRST) AS sales_rank +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - total_sales DESC + COALESCE(_s1.sum_sale_price, 0) DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv5_sqlite.sql index 85d65af26..703043987 100644 --- a/tests/test_sql_refsols/defog_dealership_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv5_sqlite.sql @@ -6,23 +6,15 @@ WITH _s1 AS ( FROM main.sales GROUP BY salesperson_id -), _t0 AS ( - SELECT - RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC) AS sales_rank, - COALESCE(_s1.sum_sale_price, 0) AS total_sales, - salespersons.first_name, - salespersons.last_name, - _s1.n_rows - FROM main.salespersons AS salespersons - JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id ) SELECT - first_name, - last_name, - total_sales, - n_rows AS num_sales, - sales_rank -FROM _t0 + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.sum_sale_price, 0) AS total_sales, + _s1.n_rows AS num_sales, + RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC) AS sales_rank +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - total_sales DESC + COALESCE(_s1.sum_sale_price, 0) DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql index b8a903eb5..7a80c3684 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql @@ -8,15 +8,24 @@ WITH _s1 AS ( sale_date >= DATE_ADD(CURRENT_TIMESTAMP(), -3, 'MONTH') GROUP BY salesperson_id +), _t0 AS ( + SELECT + salespersons.first_name AS first_name_1, + salespersons.last_name AS last_name_1, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, + _s1.n_rows + FROM main.salespersons AS salespersons + LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id + ORDER BY + COALESCE(_s1.sum_sale_price, 0) DESC + LIMIT 3 ) SELECT - salespersons.first_name, - salespersons.last_name, - COALESCE(_s1.n_rows, 0) AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.salespersons AS salespersons -LEFT JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id + first_name_1 AS first_name, + last_name_1 AS last_name, + COALESCE(n_rows, 0) AS total_sales, + total_revenue_1 AS total_revenue +FROM _t0 ORDER BY - total_revenue DESC -LIMIT 3 + total_revenue_1 DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql index c28f59e7e..103007c2c 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql @@ -8,15 +8,24 @@ WITH _s1 AS ( sale_date >= DATETIME('now', '-3 month') GROUP BY salesperson_id +), _t0 AS ( + SELECT + salespersons.first_name AS first_name_1, + salespersons.last_name AS last_name_1, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, + _s1.n_rows + FROM main.salespersons AS salespersons + LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id + ORDER BY + COALESCE(_s1.sum_sale_price, 0) DESC + LIMIT 3 ) SELECT - salespersons.first_name, - salespersons.last_name, - COALESCE(_s1.n_rows, 0) AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.salespersons AS salespersons -LEFT JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id + first_name_1 AS first_name, + last_name_1 AS last_name, + COALESCE(n_rows, 0) AS total_sales, + total_revenue_1 AS total_revenue +FROM _t0 ORDER BY - total_revenue DESC -LIMIT 3 + total_revenue_1 DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 9a9471dea..16ac04eee 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -8,15 +8,24 @@ WITH _s1 AS ( DATEDIFF(CURRENT_TIMESTAMP(), CAST(sale_date AS DATETIME), DAY) <= 30 GROUP BY salesperson_id +), _t0 AS ( + SELECT + salespersons.first_name AS first_name_1, + salespersons.last_name AS last_name_1, + _s1.n_rows_1 AS n_rows, + _s1.sum_sale_price + FROM main.salespersons AS salespersons + JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id + ORDER BY + n_rows DESC + LIMIT 5 ) SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows_1 AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.salespersons AS salespersons -JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id + first_name_1 AS first_name, + last_name_1 AS last_name, + n_rows AS total_sales, + COALESCE(sum_sale_price, 0) AS total_revenue +FROM _t0 ORDER BY - _s1.n_rows_1 DESC -LIMIT 5 + n_rows DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index 9f797c2bc..b06f6bb94 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -10,15 +10,24 @@ WITH _s1 AS ( ) AS INTEGER) <= 30 GROUP BY salesperson_id +), _t0 AS ( + SELECT + salespersons.first_name AS first_name_1, + salespersons.last_name AS last_name_1, + _s1.n_rows_1 AS n_rows, + _s1.sum_sale_price + FROM main.salespersons AS salespersons + JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id + ORDER BY + n_rows DESC + LIMIT 5 ) SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows_1 AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.salespersons AS salespersons -JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id + first_name_1 AS first_name, + last_name_1 AS last_name, + n_rows AS total_sales, + COALESCE(sum_sale_price, 0) AS total_revenue +FROM _t0 ORDER BY - _s1.n_rows_1 DESC -LIMIT 5 + n_rows DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql index 3ea00bc25..3ef87cc68 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql @@ -6,15 +6,24 @@ WITH _s1 AS ( FROM main.sales GROUP BY car_id +), _t0 AS ( + SELECT + cars.make AS make_1, + cars.model AS model_1, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, + _s1.n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + ORDER BY + COALESCE(_s1.sum_sale_price, 0) DESC + LIMIT 5 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + make_1 AS make, + model_1 AS model, + COALESCE(n_rows, 0) AS total_sales, + total_revenue_1 AS total_revenue +FROM _t0 ORDER BY - total_revenue DESC -LIMIT 5 + total_revenue_1 DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql index 3ea00bc25..3ef87cc68 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql @@ -6,15 +6,24 @@ WITH _s1 AS ( FROM main.sales GROUP BY car_id +), _t0 AS ( + SELECT + cars.make AS make_1, + cars.model AS model_1, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, + _s1.n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + ORDER BY + COALESCE(_s1.sum_sale_price, 0) DESC + LIMIT 5 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + make_1 AS make, + model_1 AS model, + COALESCE(n_rows, 0) AS total_sales, + total_revenue_1 AS total_revenue +FROM _t0 ORDER BY - total_revenue DESC -LIMIT 5 + total_revenue_1 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql index af2c3f84f..d9bb1d546 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql @@ -17,4 +17,4 @@ FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid ORDER BY - total_duration DESC + COALESCE(_s1.sum_duration, 0) DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql index 93fddddfd..c8a248291 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql @@ -23,4 +23,4 @@ FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid ORDER BY - total_duration DESC + COALESCE(_s1.sum_duration, 0) DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 747d8b6d3..4e4bdb420 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -9,14 +9,22 @@ WITH _s1 AS ( AND receiver_type = 1 GROUP BY receiver_id +), _t0 AS ( + SELECT + merchants.name AS name_1, + COALESCE(_s1.sum_amount, 0) AS total_amount_1, + _s1.n_rows + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + ORDER BY + COALESCE(_s1.sum_amount, 0) DESC + LIMIT 2 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid + name_1 AS merchant_name, + COALESCE(n_rows, 0) AS total_transactions, + total_amount_1 AS total_amount +FROM _t0 ORDER BY - total_amount DESC -LIMIT 2 + total_amount_1 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 73a50f555..ed3cfe9ea 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -9,14 +9,22 @@ WITH _s1 AS ( AND receiver_type = 1 GROUP BY receiver_id +), _t0 AS ( + SELECT + merchants.name AS name_1, + COALESCE(_s1.sum_amount, 0) AS total_amount_1, + _s1.n_rows + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + ORDER BY + COALESCE(_s1.sum_amount, 0) DESC + LIMIT 2 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid + name_1 AS merchant_name, + COALESCE(n_rows, 0) AS total_transactions, + total_amount_1 AS total_amount +FROM _t0 ORDER BY - total_amount DESC -LIMIT 2 + total_amount_1 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index c605cd315..672017439 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -6,14 +6,22 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily GROUP BY coupon_id +), _t0 AS ( + SELECT + coupons.code AS code_1, + COALESCE(_s1.count_txid, 0) AS redemption_count_1, + _s1.sum_amount + FROM main.coupons AS coupons + LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid + ORDER BY + COALESCE(_s1.count_txid, 0) DESC + LIMIT 3 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount -FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid + code_1 AS coupon_code, + redemption_count_1 AS redemption_count, + COALESCE(sum_amount, 0) AS total_discount +FROM _t0 ORDER BY - redemption_count DESC -LIMIT 3 + redemption_count_1 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index c605cd315..672017439 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -6,14 +6,22 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily GROUP BY coupon_id +), _t0 AS ( + SELECT + coupons.code AS code_1, + COALESCE(_s1.count_txid, 0) AS redemption_count_1, + _s1.sum_amount + FROM main.coupons AS coupons + LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid + ORDER BY + COALESCE(_s1.count_txid, 0) DESC + LIMIT 3 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount -FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid + code_1 AS coupon_code, + redemption_count_1 AS redemption_count, + COALESCE(sum_amount, 0) AS total_discount +FROM _t0 ORDER BY - redemption_count DESC -LIMIT 3 + redemption_count_1 DESC diff --git a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql index f9e924c2b..abf9e60c6 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql @@ -3,30 +3,40 @@ WITH _s2 AS ( ev_dt, ev_key FROM events +), _t0 AS ( + SELECT + eras.er_name AS er_name_1, + events.ev_name AS ev_name_1, + seasons.s_name AS s_name_1, + times.t_name AS t_name_1, + events.ev_dt + FROM events AS events + JOIN eras AS eras + ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) + AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) + JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key + JOIN seasons AS seasons + ON seasons.s_month1 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + OR seasons.s_month2 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + OR seasons.s_month3 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key + JOIN times AS times + ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) + AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) + WHERE + events.ev_typ = 'culture' + ORDER BY + ev_dt + LIMIT 6 ) SELECT - events.ev_name AS event_name, - eras.er_name AS era_name, - EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) AS event_year, - seasons.s_name AS season_name, - times.t_name AS tod -FROM events AS events -JOIN eras AS eras - ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) - AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) -JOIN _s2 AS _s2 - ON _s2.ev_key = events.ev_key -JOIN seasons AS seasons - ON seasons.s_month1 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - OR seasons.s_month2 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - OR seasons.s_month3 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) -JOIN _s2 AS _s6 - ON _s6.ev_key = events.ev_key -JOIN times AS times - ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) - AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) -WHERE - events.ev_typ = 'culture' + ev_name_1 AS event_name, + er_name_1 AS era_name, + EXTRACT(YEAR FROM CAST(ev_dt AS DATETIME)) AS event_year, + s_name_1 AS season_name, + t_name_1 AS tod +FROM _t0 ORDER BY - events.ev_dt -LIMIT 6 + ev_dt diff --git a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql index 25f7ff28f..972bff76a 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql @@ -3,30 +3,40 @@ WITH _s2 AS ( ev_dt, ev_key FROM events +), _t0 AS ( + SELECT + eras.er_name AS er_name_1, + events.ev_name AS ev_name_1, + seasons.s_name AS s_name_1, + times.t_name AS t_name_1, + events.ev_dt + FROM events AS events + JOIN eras AS eras + ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) + AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) + JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key + JOIN seasons AS seasons + ON seasons.s_month1 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + OR seasons.s_month2 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + OR seasons.s_month3 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key + JOIN times AS times + ON times.t_end_hour > CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) + AND times.t_start_hour <= CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) + WHERE + events.ev_typ = 'culture' + ORDER BY + ev_dt + LIMIT 6 ) SELECT - events.ev_name AS event_name, - eras.er_name AS era_name, - CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) AS event_year, - seasons.s_name AS season_name, - times.t_name AS tod -FROM events AS events -JOIN eras AS eras - ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) - AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) -JOIN _s2 AS _s2 - ON _s2.ev_key = events.ev_key -JOIN seasons AS seasons - ON seasons.s_month1 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - OR seasons.s_month2 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - OR seasons.s_month3 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) -JOIN _s2 AS _s6 - ON _s6.ev_key = events.ev_key -JOIN times AS times - ON times.t_end_hour > CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) - AND times.t_start_hour <= CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) -WHERE - events.ev_typ = 'culture' + ev_name_1 AS event_name, + er_name_1 AS era_name, + CAST(STRFTIME('%Y', ev_dt) AS INTEGER) AS event_year, + s_name_1 AS season_name, + t_name_1 AS tod +FROM _t0 ORDER BY - events.ev_dt -LIMIT 6 + ev_dt diff --git a/tests/test_sql_refsols/epoch_pct_searches_per_tod_ansi.sql b/tests/test_sql_refsols/epoch_pct_searches_per_tod_ansi.sql index 4c8eab137..84cd80284 100644 --- a/tests/test_sql_refsols/epoch_pct_searches_per_tod_ansi.sql +++ b/tests/test_sql_refsols/epoch_pct_searches_per_tod_ansi.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t0 AS ( SELECT ANY_VALUE(times.t_name) AS anything_t_name, ANY_VALUE(times.t_start_hour) AS anything_t_start_hour, @@ -9,18 +9,12 @@ WITH _t1 AS ( AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(searches.search_ts AS DATETIME)) GROUP BY times.t_name -), _t0 AS ( - SELECT - ROUND(( - 100.0 * n_rows - ) / SUM(n_rows) OVER (), 2) AS pct_searches, - anything_t_name, - anything_t_start_hour - FROM _t1 ) SELECT anything_t_name AS tod, - pct_searches + ROUND(( + 100.0 * n_rows + ) / SUM(n_rows) OVER (), 2) AS pct_searches FROM _t0 ORDER BY anything_t_start_hour diff --git a/tests/test_sql_refsols/epoch_pct_searches_per_tod_sqlite.sql b/tests/test_sql_refsols/epoch_pct_searches_per_tod_sqlite.sql index 2ba2f91ca..d3e7dd78f 100644 --- a/tests/test_sql_refsols/epoch_pct_searches_per_tod_sqlite.sql +++ b/tests/test_sql_refsols/epoch_pct_searches_per_tod_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t0 AS ( SELECT MAX(times.t_name) AS anything_t_name, MAX(times.t_start_hour) AS anything_t_start_hour, @@ -9,18 +9,12 @@ WITH _t1 AS ( AND times.t_start_hour <= CAST(STRFTIME('%H', searches.search_ts) AS INTEGER) GROUP BY times.t_name -), _t0 AS ( - SELECT - ROUND(CAST(( - 100.0 * n_rows - ) AS REAL) / SUM(n_rows) OVER (), 2) AS pct_searches, - anything_t_name, - anything_t_start_hour - FROM _t1 ) SELECT anything_t_name AS tod, - pct_searches + ROUND(CAST(( + 100.0 * n_rows + ) AS REAL) / SUM(n_rows) OVER (), 2) AS pct_searches FROM _t0 ORDER BY anything_t_start_hour diff --git a/tests/test_sql_refsols/epoch_search_results_by_tod_ansi.sql b/tests/test_sql_refsols/epoch_search_results_by_tod_ansi.sql index 61ae5d2ab..c7e546d2a 100644 --- a/tests/test_sql_refsols/epoch_search_results_by_tod_ansi.sql +++ b/tests/test_sql_refsols/epoch_search_results_by_tod_ansi.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t0 AS ( SELECT ANY_VALUE(times.t_name) AS anything_t_name, ANY_VALUE(times.t_start_hour) AS anything_t_start_hour, @@ -10,20 +10,13 @@ WITH _t1 AS ( AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(searches.search_ts AS DATETIME)) GROUP BY times.t_name -), _t0 AS ( - SELECT - ROUND(avg_search_num_results, 2) AS avg_results, - ROUND(( - 100.0 * n_rows - ) / SUM(n_rows) OVER (), 2) AS pct_searches, - anything_t_name, - anything_t_start_hour - FROM _t1 ) SELECT anything_t_name AS tod, - pct_searches, - avg_results + ROUND(( + 100.0 * n_rows + ) / SUM(n_rows) OVER (), 2) AS pct_searches, + ROUND(avg_search_num_results, 2) AS avg_results FROM _t0 ORDER BY anything_t_start_hour diff --git a/tests/test_sql_refsols/epoch_search_results_by_tod_sqlite.sql b/tests/test_sql_refsols/epoch_search_results_by_tod_sqlite.sql index 9cb23ba2b..1c7a5d778 100644 --- a/tests/test_sql_refsols/epoch_search_results_by_tod_sqlite.sql +++ b/tests/test_sql_refsols/epoch_search_results_by_tod_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t0 AS ( SELECT MAX(times.t_name) AS anything_t_name, MAX(times.t_start_hour) AS anything_t_start_hour, @@ -10,20 +10,13 @@ WITH _t1 AS ( AND times.t_start_hour <= CAST(STRFTIME('%H', searches.search_ts) AS INTEGER) GROUP BY times.t_name -), _t0 AS ( - SELECT - ROUND(avg_search_num_results, 2) AS avg_results, - ROUND(CAST(( - 100.0 * n_rows - ) AS REAL) / SUM(n_rows) OVER (), 2) AS pct_searches, - anything_t_name, - anything_t_start_hour - FROM _t1 ) SELECT anything_t_name AS tod, - pct_searches, - avg_results + ROUND(CAST(( + 100.0 * n_rows + ) AS REAL) / SUM(n_rows) OVER (), 2) AS pct_searches, + ROUND(avg_search_num_results, 2) AS avg_results FROM _t0 ORDER BY anything_t_start_hour diff --git a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql index f04f7a41b..2d82d40ff 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql @@ -1,9 +1,19 @@ +WITH _t0 AS ( + SELECT + CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost_1, + ps_availqty, + ps_partkey, + ps_suppkey + FROM tpch.partsupp + ORDER BY + CEIL(ps_supplycost * FLOOR(ps_availqty)) DESC + LIMIT 10 +) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, FLOOR(ps_availqty) AS complete_parts, - CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost -FROM tpch.partsupp + total_cost_1 AS total_cost +FROM _t0 ORDER BY - total_cost DESC -LIMIT 10 + total_cost_1 DESC diff --git a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql index c72007f73..251ef1ebf 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql @@ -1,19 +1,39 @@ +WITH _t0 AS ( + SELECT + CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) + CASE + WHEN CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) < ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) + THEN 1 + ELSE 0 + END AS total_cost_1, + ps_availqty, + ps_partkey, + ps_suppkey + FROM tpch.partsupp + ORDER BY + CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) + CASE + WHEN CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) < ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) + THEN 1 + ELSE 0 + END DESC + LIMIT 10 +) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END AS complete_parts, - CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) + CASE - WHEN CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) < ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) - THEN 1 - ELSE 0 - END AS total_cost -FROM tpch.partsupp + total_cost_1 AS total_cost +FROM _t0 ORDER BY - total_cost DESC -LIMIT 10 + total_cost_1 DESC diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index 45f15dc10..d900cffd0 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT in_device_id FROM main.incidents @@ -6,7 +6,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s3 AS ( @@ -23,7 +23,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s7 AS ( @@ -40,7 +40,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s13 AS ( diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index 93c5d04ef..9d1838628 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT in_device_id FROM main.incidents @@ -6,7 +6,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s3 AS ( @@ -23,7 +23,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s7 AS ( @@ -40,7 +40,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s13 AS ( diff --git a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_ansi.sql b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_ansi.sql index 4d76d5bdb..f96313a8a 100644 --- a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_ansi.sql @@ -9,19 +9,16 @@ WITH _s5 AS ( ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' GROUP BY incidents.in_error_id -), _t0 AS ( - SELECT - ROUND(( - 100.0 * COALESCE(_s5.n_rows, 0) - ) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), 2) AS pct, - errors.er_name - FROM main.errors AS errors - LEFT JOIN _s5 AS _s5 - ON _s5.in_error_id = errors.er_id ) SELECT - er_name AS error, - pct -FROM _t0 + errors.er_name AS error, + ROUND(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), 2) AS pct +FROM main.errors AS errors +LEFT JOIN _s5 AS _s5 + ON _s5.in_error_id = errors.er_id ORDER BY - pct DESC + ROUND(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), 2) DESC diff --git a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_sqlite.sql b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_sqlite.sql index 410c09ec0..cc033e6bb 100644 --- a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_sqlite.sql @@ -9,22 +9,22 @@ WITH _s5 AS ( ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' GROUP BY incidents.in_error_id -), _t0 AS ( - SELECT - ROUND( - CAST(( - 100.0 * COALESCE(_s5.n_rows, 0) - ) AS REAL) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), - 2 - ) AS pct, - errors.er_name - FROM main.errors AS errors - LEFT JOIN _s5 AS _s5 - ON _s5.in_error_id = errors.er_id ) SELECT - er_name AS error, - pct -FROM _t0 + errors.er_name AS error, + ROUND( + CAST(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) AS REAL) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), + 2 + ) AS pct +FROM main.errors AS errors +LEFT JOIN _s5 AS _s5 + ON _s5.in_error_id = errors.er_id ORDER BY - pct DESC + ROUND( + CAST(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) AS REAL) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), + 2 + ) DESC diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 211f66449..3c42321e0 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,10 +1,10 @@ -WITH _t5 AS ( +WITH _t3 AS ( SELECT ca_dt FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t8 AS ( +), _t7 AS ( SELECT co_id, co_name @@ -14,26 +14,26 @@ WITH _t5 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t7.ca_dt - FROM _t5 AS _t7 + _t6.ca_dt + FROM _t3 AS _t6 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_t7.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_t6.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t8 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t7 AS _t7 + ON _t7.co_id = devices.de_production_country_id GROUP BY - _t7.ca_dt + _t6.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, _t10.ca_dt - FROM _t5 AS _t10 + FROM _t3 AS _t10 JOIN main.incidents AS incidents ON _t10.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t11 + JOIN _t7 AS _t11 ON _t11.co_id = devices.de_production_country_id GROUP BY _t10.ca_dt @@ -41,11 +41,11 @@ WITH _t5 AS ( SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t5 AS _t5 +FROM _t3 AS _t3 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t5.ca_dt + ON _s7.ca_dt = _t3.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t5.ca_dt + ON _s15.ca_dt = _t3.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index e896b36ac..1c08c7d2e 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,10 +1,10 @@ -WITH _t5 AS ( +WITH _t3 AS ( SELECT ca_dt FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t8 AS ( +), _t7 AS ( SELECT co_id, co_name @@ -14,26 +14,26 @@ WITH _t5 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t7.ca_dt - FROM _t5 AS _t7 + _t6.ca_dt + FROM _t3 AS _t6 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_t7.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_t6.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t8 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t7 AS _t7 + ON _t7.co_id = devices.de_production_country_id GROUP BY - _t7.ca_dt + _t6.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, _t10.ca_dt - FROM _t5 AS _t10 + FROM _t3 AS _t10 JOIN main.incidents AS incidents ON _t10.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t11 + JOIN _t7 AS _t11 ON _t11.co_id = devices.de_production_country_id GROUP BY _t10.ca_dt @@ -41,11 +41,11 @@ WITH _t5 AS ( SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t5 AS _t5 +FROM _t3 AS _t3 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t5.ca_dt + ON _s7.ca_dt = _t3.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t5.ca_dt + ON _s15.ca_dt = _t3.ca_dt GROUP BY - CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER) + CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql index 292b7386e..d56b6a7e2 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t9 AS ( +), _t7 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t9 AS _t9 - ON _t9.pr_id = devices.de_product_id + JOIN _t7 AS _t7 + ON _t7.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t9 AS _t11 - ON _t11.pr_id = devices.de_product_id + JOIN _t7 AS _t9 + ON _t9.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( @@ -51,42 +51,33 @@ WITH _s14 AS ( ON _s13.ca_dt = _s6.ca_dt GROUP BY EXTRACT(YEAR FROM CAST(_s6.ca_dt AS DATETIME)) -), _t0 AS ( - SELECT - ROUND( - SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS cum_ir, - ROUND( - ( - 100.0 * ( - COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year NULLS LAST) - ) - ) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year NULLS LAST), - 2 - ) AS pct_bought_change, - ROUND( - ( - 100.0 * ( - COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year NULLS LAST) - ) - ) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year NULLS LAST), - 2 - ) AS pct_incident_change, - _s15.year - EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) AS years_since_release, - COALESCE(_s15.sum_n_rows, 0) AS n_devices, - COALESCE(_s15.sum_expr_4, 0) AS n_incidents - FROM _s14 AS _s14 - JOIN _s15 AS _s15 - ON _s15.year >= EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) ) SELECT - years_since_release, - cum_ir, - pct_bought_change, - pct_incident_change, - n_devices AS bought, - n_incidents AS incidents -FROM _t0 + _s15.year - EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) AS years_since_release, + ROUND( + SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + ( + 100.0 * ( + COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year NULLS LAST) + ) + ) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year NULLS LAST), + 2 + ) AS pct_bought_change, + ROUND( + ( + 100.0 * ( + COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year NULLS LAST) + ) + ) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year NULLS LAST), + 2 + ) AS pct_incident_change, + COALESCE(_s15.sum_n_rows, 0) AS bought, + COALESCE(_s15.sum_expr_4, 0) AS incidents +FROM _s14 AS _s14 +JOIN _s15 AS _s15 + ON _s15.year >= EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) ORDER BY - years_since_release + _s15.year - EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql index 4becabea9..40f2461ce 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t9 AS ( +), _t7 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t9 AS _t9 - ON _t9.pr_id = devices.de_product_id + JOIN _t7 AS _t7 + ON _t7.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t9 AS _t11 - ON _t11.pr_id = devices.de_product_id + JOIN _t7 AS _t9 + ON _t9.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( @@ -51,42 +51,33 @@ WITH _s14 AS ( ON _s13.ca_dt = _s6.ca_dt GROUP BY CAST(STRFTIME('%Y', _s6.ca_dt) AS INTEGER) -), _t0 AS ( - SELECT - ROUND( - CAST(SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS REAL) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS cum_ir, - ROUND( - CAST(( - 100.0 * ( - COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year) - ) - ) AS REAL) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year), - 2 - ) AS pct_bought_change, - ROUND( - CAST(( - 100.0 * ( - COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year) - ) - ) AS REAL) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year), - 2 - ) AS pct_incident_change, - _s15.year - CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) AS years_since_release, - COALESCE(_s15.sum_n_rows, 0) AS n_devices, - COALESCE(_s15.sum_expr_4, 0) AS n_incidents - FROM _s14 AS _s14 - JOIN _s15 AS _s15 - ON _s15.year >= CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) ) SELECT - years_since_release, - cum_ir, - pct_bought_change, - pct_incident_change, - n_devices AS bought, - n_incidents AS incidents -FROM _t0 + _s15.year - CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) AS years_since_release, + ROUND( + CAST(SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS REAL) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + CAST(( + 100.0 * ( + COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year) + ) + ) AS REAL) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year), + 2 + ) AS pct_bought_change, + ROUND( + CAST(( + 100.0 * ( + COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year) + ) + ) AS REAL) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year), + 2 + ) AS pct_incident_change, + COALESCE(_s15.sum_n_rows, 0) AS bought, + COALESCE(_s15.sum_expr_4, 0) AS incidents +FROM _s14 AS _s14 +JOIN _s15 AS _s15 + ON _s15.year >= CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) ORDER BY - years_since_release + _s15.year - CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index 337d437df..1f40c54cb 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -20,7 +20,7 @@ WITH _s2 AS ( ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) GROUP BY _s4.ca_dt -), _t4 AS ( +), _t2 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, @@ -32,42 +32,33 @@ WITH _s2 AS ( ON _s2.ca_dt = _s7.ca_dt GROUP BY EXTRACT(YEAR FROM CAST(_s2.ca_dt AS DATETIME)) -), _t0 AS ( - SELECT - ROUND( - SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS cum_ir, - ROUND( - ( - 100.0 * ( - COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year NULLS LAST) - ) - ) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year NULLS LAST), - 2 - ) AS pct_bought_change, - ROUND( - ( - 100.0 * ( - COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year NULLS LAST) - ) - ) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year NULLS LAST), - 2 - ) AS pct_incident_change, - COALESCE(sum_expr_3, 0) AS n_devices, - COALESCE(sum_n_rows, 0) AS n_incidents, - year - FROM _t4 - WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ) SELECT year AS yr, - cum_ir, - pct_bought_change, - pct_incident_change, - n_devices AS bought, - n_incidents AS incidents -FROM _t0 + ROUND( + SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + ( + 100.0 * ( + COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year NULLS LAST) + ) + ) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year NULLS LAST), + 2 + ) AS pct_bought_change, + ROUND( + ( + 100.0 * ( + COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year NULLS LAST) + ) + ) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year NULLS LAST), + 2 + ) AS pct_incident_change, + COALESCE(sum_expr_3, 0) AS bought, + COALESCE(sum_n_rows, 0) AS incidents +FROM _t2 +WHERE + NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ORDER BY year diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index 61f47822c..53b91a35c 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -20,7 +20,7 @@ WITH _s2 AS ( ON _s4.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') GROUP BY _s4.ca_dt -), _t4 AS ( +), _t2 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, @@ -32,42 +32,33 @@ WITH _s2 AS ( ON _s2.ca_dt = _s7.ca_dt GROUP BY CAST(STRFTIME('%Y', _s2.ca_dt) AS INTEGER) -), _t0 AS ( - SELECT - ROUND( - CAST(SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS REAL) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS cum_ir, - ROUND( - CAST(( - 100.0 * ( - COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year) - ) - ) AS REAL) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year), - 2 - ) AS pct_bought_change, - ROUND( - CAST(( - 100.0 * ( - COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year) - ) - ) AS REAL) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year), - 2 - ) AS pct_incident_change, - COALESCE(sum_expr_3, 0) AS n_devices, - COALESCE(sum_n_rows, 0) AS n_incidents, - year - FROM _t4 - WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ) SELECT year AS yr, - cum_ir, - pct_bought_change, - pct_incident_change, - n_devices AS bought, - n_incidents AS incidents -FROM _t0 + ROUND( + CAST(SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS REAL) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + CAST(( + 100.0 * ( + COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year) + ) + ) AS REAL) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year), + 2 + ) AS pct_bought_change, + ROUND( + CAST(( + 100.0 * ( + COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year) + ) + ) AS REAL) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year), + 2 + ) AS pct_incident_change, + COALESCE(sum_expr_3, 0) AS bought, + COALESCE(sum_n_rows, 0) AS incidents +FROM _t2 +WHERE + NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ORDER BY year diff --git a/tests/test_sql_refsols/time_threshold_reached_ansi.sql b/tests/test_sql_refsols/time_threshold_reached_ansi.sql index 08eebf4aa..853086170 100644 --- a/tests/test_sql_refsols/time_threshold_reached_ansi.sql +++ b/tests/test_sql_refsols/time_threshold_reached_ansi.sql @@ -3,8 +3,8 @@ WITH _t3 AS ( ( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) / SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP))) AS pct_of_day, - DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day_1, - sbtxdatetime + sbtxdatetime, + DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) = 2023 @@ -15,7 +15,7 @@ WITH _t3 AS ( WHERE pct_of_day >= 50.0 QUALIFY - ROW_NUMBER() OVER (PARTITION BY txn_day_1 ORDER BY pct_of_day NULLS LAST) = 1 + ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day NULLS LAST) = 1 ) SELECT sbtxdatetime AS date_time diff --git a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql index c69bb3b85..5e47efb24 100644 --- a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql +++ b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql @@ -3,15 +3,15 @@ WITH _t3 AS ( CAST(( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day') ORDER BY sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS REAL) / SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day')) AS pct_of_day, - DATE(sbtxdatetime, 'start of day') AS txn_day_1, - sbtxdatetime + sbtxdatetime, + DATE(sbtxdatetime, 'start of day') AS txn_day FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) = 2023 ), _t AS ( SELECT sbtxdatetime, - ROW_NUMBER() OVER (PARTITION BY txn_day_1 ORDER BY pct_of_day) AS _w + ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day) AS _w FROM _t3 WHERE pct_of_day >= 50.0 diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index f76f36d96..53460ad26 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -35,9 +35,7 @@ SELECT COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr_2, 0) + ON COALESCE(_s8.sum_metric, 0) * 0.0001 < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index f76f36d96..53460ad26 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -35,9 +35,7 @@ SELECT COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr_2, 0) + ON COALESCE(_s8.sum_metric, 0) * 0.0001 < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q15_ansi.sql b/tests/test_sql_refsols/tpch_q15_ansi.sql index 8bee61545..fdfcf8468 100644 --- a/tests/test_sql_refsols/tpch_q15_ansi.sql +++ b/tests/test_sql_refsols/tpch_q15_ansi.sql @@ -1,4 +1,4 @@ -WITH _t6 AS ( +WITH _t5 AS ( SELECT l_discount, l_extendedprice, @@ -14,7 +14,7 @@ WITH _t6 AS ( 1 - l_discount )) AS sum_expr_2, l_suppkey - FROM _t6 + FROM _t5 GROUP BY l_suppkey ), _s2 AS ( @@ -28,11 +28,8 @@ WITH _t6 AS ( SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr_3, - SUM(l_extendedprice * ( - 1 - l_discount - )) AS sum_expr_3_1, l_suppkey - FROM _t6 + FROM _t5 GROUP BY l_suppkey ) @@ -45,7 +42,7 @@ SELECT FROM _s2 AS _s2 CROSS JOIN tpch.supplier AS supplier JOIN _s5 AS _s5 - ON _s2.max_revenue = COALESCE(_s5.sum_expr_3_1, 0) + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) AND _s5.l_suppkey = supplier.s_suppkey ORDER BY s_suppkey diff --git a/tests/test_sql_refsols/tpch_q15_sqlite.sql b/tests/test_sql_refsols/tpch_q15_sqlite.sql index 25340d6a8..4b6f85ec0 100644 --- a/tests/test_sql_refsols/tpch_q15_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q15_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t6 AS ( +WITH _t5 AS ( SELECT l_discount, l_extendedprice, @@ -13,7 +13,7 @@ WITH _t6 AS ( 1 - l_discount )) AS sum_expr_2, l_suppkey - FROM _t6 + FROM _t5 GROUP BY l_suppkey ), _s2 AS ( @@ -27,11 +27,8 @@ WITH _t6 AS ( SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr_3, - SUM(l_extendedprice * ( - 1 - l_discount - )) AS sum_expr_3_1, l_suppkey - FROM _t6 + FROM _t5 GROUP BY l_suppkey ) @@ -44,7 +41,7 @@ SELECT FROM _s2 AS _s2 CROSS JOIN tpch.supplier AS supplier JOIN _s5 AS _s5 - ON _s2.max_revenue = COALESCE(_s5.sum_expr_3_1, 0) + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) AND _s5.l_suppkey = supplier.s_suppkey ORDER BY s_suppkey diff --git a/tests/test_sql_refsols/tpch_q5_ansi.sql b/tests/test_sql_refsols/tpch_q5_ansi.sql index 200918b17..c97200d61 100644 --- a/tests/test_sql_refsols/tpch_q5_ansi.sql +++ b/tests/test_sql_refsols/tpch_q5_ansi.sql @@ -27,4 +27,6 @@ JOIN _s11 AS _s11 GROUP BY nation.n_nationkey ORDER BY - revenue DESC + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) DESC diff --git a/tests/test_sql_refsols/tpch_q5_sqlite.sql b/tests/test_sql_refsols/tpch_q5_sqlite.sql index d65176b51..6cea8d423 100644 --- a/tests/test_sql_refsols/tpch_q5_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q5_sqlite.sql @@ -27,4 +27,6 @@ JOIN _s11 AS _s11 GROUP BY nation.n_nationkey ORDER BY - revenue DESC + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) DESC diff --git a/tests/test_sql_refsols/tpch_q9_ansi.sql b/tests/test_sql_refsols/tpch_q9_ansi.sql index 12fe871b0..99390991a 100644 --- a/tests/test_sql_refsols/tpch_q9_ansi.sql +++ b/tests/test_sql_refsols/tpch_q9_ansi.sql @@ -1,30 +1,37 @@ -SELECT - nation.n_name AS NATION, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) AS O_YEAR, - COALESCE( +WITH _t0 AS ( + SELECT SUM( lineitem.l_extendedprice * ( 1 - lineitem.l_discount ) - partsupp.ps_supplycost * lineitem.l_quantity - ), - 0 - ) AS AMOUNT -FROM tpch.lineitem AS lineitem -JOIN tpch.part AS part - ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' -JOIN tpch.supplier AS supplier - ON lineitem.l_suppkey = supplier.s_suppkey -JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey -JOIN tpch.orders AS orders - ON lineitem.l_orderkey = orders.o_orderkey -JOIN tpch.partsupp AS partsupp - ON lineitem.l_partkey = partsupp.ps_partkey - AND lineitem.l_suppkey = partsupp.ps_suppkey -GROUP BY - nation.n_name, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) + ) AS sum_value, + nation.n_name, + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) AS o_year + FROM tpch.lineitem AS lineitem + JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' + JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey + JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey + JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey + JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey + GROUP BY + nation.n_name, + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) + ORDER BY + n_name, + o_year DESC + LIMIT 10 +) +SELECT + n_name AS NATION, + o_year AS O_YEAR, + COALESCE(sum_value, 0) AS AMOUNT +FROM _t0 ORDER BY - nation.n_name, + n_name, o_year DESC -LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q9_sqlite.sql b/tests/test_sql_refsols/tpch_q9_sqlite.sql index 37e726db9..ac17a15d1 100644 --- a/tests/test_sql_refsols/tpch_q9_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q9_sqlite.sql @@ -1,30 +1,37 @@ -SELECT - nation.n_name AS NATION, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) AS O_YEAR, - COALESCE( +WITH _t0 AS ( + SELECT SUM( lineitem.l_extendedprice * ( 1 - lineitem.l_discount ) - partsupp.ps_supplycost * lineitem.l_quantity - ), - 0 - ) AS AMOUNT -FROM tpch.lineitem AS lineitem -JOIN tpch.part AS part - ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' -JOIN tpch.supplier AS supplier - ON lineitem.l_suppkey = supplier.s_suppkey -JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey -JOIN tpch.orders AS orders - ON lineitem.l_orderkey = orders.o_orderkey -JOIN tpch.partsupp AS partsupp - ON lineitem.l_partkey = partsupp.ps_partkey - AND lineitem.l_suppkey = partsupp.ps_suppkey -GROUP BY - nation.n_name, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) + ) AS sum_value, + nation.n_name, + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) AS o_year + FROM tpch.lineitem AS lineitem + JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' + JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey + JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey + JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey + JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey + GROUP BY + nation.n_name, + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) + ORDER BY + n_name, + o_year DESC + LIMIT 10 +) +SELECT + n_name AS NATION, + o_year AS O_YEAR, + COALESCE(sum_value, 0) AS AMOUNT +FROM _t0 ORDER BY - nation.n_name, + n_name, o_year DESC -LIMIT 10 From a678974407712fa44b3d48d5a61028bb3cd8d0a1 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 03:05:11 -0400 Subject: [PATCH 012/143] Resolving conflicts --- pydough/pydough_operators/type_inference/type_verifier.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pydough/pydough_operators/type_inference/type_verifier.py b/pydough/pydough_operators/type_inference/type_verifier.py index 79d2900b5..092621c49 100644 --- a/pydough/pydough_operators/type_inference/type_verifier.py +++ b/pydough/pydough_operators/type_inference/type_verifier.py @@ -15,10 +15,9 @@ from abc import ABC, abstractmethod from typing import Any -from pydough.errors import PyDoughQDAGException +from pydough.errors import PyDoughMetadataException, PyDoughQDAGException from pydough.errors.error_utils import ( NoExtraKeys, - PyDoughMetadataException, extract_array, extract_integer, extract_string, From d7ec696fe9e841a6b567349ecb2cb3bbabdf1d2a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 03:13:36 -0400 Subject: [PATCH 013/143] Adding extra round of bubbling --- pydough/conversion/relational_converter.py | 3 ++ tests/test_plan_refsols/aggregate_semi.txt | 13 +++--- .../aggregate_then_backref.txt | 11 +++-- .../aggregation_analytics_2.txt | 27 ++++++------ .../aggregation_analytics_3.txt | 27 ++++++------ tests/test_plan_refsols/bad_child_reuse_1.txt | 15 ++++--- tests/test_plan_refsols/bad_child_reuse_5.txt | 11 +++-- tests/test_plan_refsols/common_prefix_al.txt | 4 +- tests/test_plan_refsols/common_prefix_aq.txt | 18 ++++---- tests/test_plan_refsols/common_prefix_b.txt | 11 +++-- tests/test_plan_refsols/common_prefix_c.txt | 21 +++++----- tests/test_plan_refsols/common_prefix_d.txt | 41 +++++++++---------- tests/test_plan_refsols/common_prefix_f.txt | 11 +++-- tests/test_plan_refsols/common_prefix_g.txt | 11 +++-- tests/test_plan_refsols/common_prefix_h.txt | 21 +++++----- tests/test_plan_refsols/common_prefix_j.txt | 9 ++-- tests/test_plan_refsols/common_prefix_k.txt | 9 ++-- tests/test_plan_refsols/common_prefix_l.txt | 31 +++++++------- tests/test_plan_refsols/common_prefix_m.txt | 36 ++++++++-------- tests/test_plan_refsols/common_prefix_o.txt | 15 ++++--- tests/test_plan_refsols/common_prefix_p.txt | 17 ++++---- tests/test_plan_refsols/common_prefix_q.txt | 4 +- tests/test_plan_refsols/common_prefix_r.txt | 4 +- tests/test_plan_refsols/common_prefix_v.txt | 11 +++-- tests/test_plan_refsols/common_prefix_w.txt | 13 +++--- tests/test_plan_refsols/correl_14.txt | 23 +++++------ tests/test_plan_refsols/correl_15.txt | 30 +++++++------- tests/test_plan_refsols/correl_24.txt | 4 +- tests/test_plan_refsols/correl_26.txt | 21 +++++----- tests/test_plan_refsols/correl_27.txt | 19 ++++----- tests/test_plan_refsols/correl_28.txt | 15 ++++--- tests/test_plan_refsols/correl_29.txt | 33 ++++++++------- tests/test_plan_refsols/correl_30.txt | 23 +++++------ tests/test_plan_refsols/correl_31.txt | 27 ++++++------ tests/test_plan_refsols/correl_32.txt | 4 +- tests/test_plan_refsols/correl_34.txt | 15 ++++--- .../count_cust_supplier_nation_combos.txt | 13 +++--- .../customer_largest_order_deltas.txt | 4 +- .../customer_most_recent_orders.txt | 4 +- .../test_plan_refsols/deep_best_analysis.txt | 4 +- .../epoch_culture_events_info.txt | 30 ++++++-------- tests/test_plan_refsols/exponentiation.txt | 4 +- tests/test_plan_refsols/floor_and_ceil_2.txt | 4 +- ...lineitems_access_cust_supplier_nations.txt | 17 ++++---- .../lines_shipping_vs_customer_region.txt | 22 +++++----- .../mostly_positive_accounts_per_nation3.txt | 13 +++--- ...ple_simple_aggregations_multiple_calcs.txt | 17 ++++---- .../num_positive_accounts_per_nation.txt | 13 +++--- .../orders_versus_first_orders.txt | 4 +- tests/test_plan_refsols/part_reduced_size.txt | 11 +++-- .../parts_quantity_increase_95_96.txt | 4 +- .../rank_nations_per_region_by_customers.txt | 4 +- ...rank_parts_per_supplier_region_by_size.txt | 4 +- tests/test_plan_refsols/singular4.txt | 15 ++++--- tests/test_plan_refsols/singular7.txt | 29 +++++++------ .../sqlite_udf_count_epsilon.txt | 17 ++++---- .../sqlite_udf_covar_pop.txt | 17 ++++---- .../test_plan_refsols/sqlite_udf_decode3.txt | 9 ++-- .../sqlite_udf_format_datetime.txt | 7 ++-- tests/test_plan_refsols/sqlite_udf_gcat.txt | 5 +-- tests/test_plan_refsols/sqlite_udf_nval.txt | 9 ++-- .../sqlite_udf_percent_positive.txt | 24 +++++------ tests/test_plan_refsols/sqlite_udf_relmin.txt | 11 +++-- .../test_plan_refsols/supplier_best_part.txt | 14 +++---- .../supplier_pct_national_qty.txt | 4 +- .../test_plan_refsols/suppliers_bal_diffs.txt | 4 +- ...chnograph_country_combination_analysis.txt | 4 +- ...hnograph_incident_rate_by_release_year.txt | 9 ++-- .../technograph_monthly_incident_rate.txt | 33 ++++++++------- .../technograph_most_unreliable_products.txt | 4 +- ...umulative_incident_rate_goldcopperstar.txt | 29 +++++++------ ..._year_cumulative_incident_rate_overall.txt | 19 ++++----- tests/test_plan_refsols/tpch_q10.txt | 25 ++++++----- tests/test_plan_refsols/tpch_q2.txt | 11 +++-- tests/test_plan_refsols/tpch_q20.txt | 15 ++++--- tests/test_plan_refsols/tpch_q5.txt | 25 ++++++----- .../various_aggfuncs_simple.txt | 7 ++-- .../window_sliding_frame_relsize.txt | 4 +- .../window_sliding_frame_relsum.txt | 4 +- .../years_months_days_hours_datediff.txt | 4 +- tests/test_sql_refsols/datediff_ansi.sql | 10 ++--- tests/test_sql_refsols/datediff_sqlite.sql | 10 ++--- .../defog_broker_basic3_ansi.sql | 14 +++---- .../defog_broker_basic3_sqlite.sql | 14 +++---- .../defog_dealership_basic10_ansi.sql | 18 ++++---- .../defog_dealership_basic10_sqlite.sql | 18 ++++---- .../defog_dealership_basic5_ansi.sql | 12 +++--- .../defog_dealership_basic5_sqlite.sql | 12 +++--- .../defog_dealership_basic8_ansi.sql | 18 ++++---- .../defog_dealership_basic8_sqlite.sql | 18 ++++---- .../defog_ewallet_basic10_ansi.sql | 14 +++---- .../defog_ewallet_basic10_sqlite.sql | 14 +++---- .../defog_ewallet_basic8_ansi.sql | 12 +++--- .../defog_ewallet_basic8_sqlite.sql | 12 +++--- .../epoch_culture_events_info_ansi.sql | 18 ++++---- .../epoch_culture_events_info_sqlite.sql | 18 ++++---- .../floor_and_ceil_2_ansi.sql | 10 ++--- .../floor_and_ceil_2_sqlite.sql | 24 ++++------- .../sqlite_udf_count_epsilon_sqlite.sql | 8 ++-- .../sqlite_udf_decode3_sqlite.sql | 16 ++++++-- .../sqlite_udf_format_datetime_sqlite.sql | 13 +++++- .../sqlite_udf_gcat_sqlite.sql | 16 ++------ .../sqlite_udf_nval_sqlite.sql | 32 +++++---------- .../sqlite_udf_percent_positive_sqlite.sql | 22 ++++------ tests/test_sql_refsols/tpch_q20_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 4 +- 106 files changed, 717 insertions(+), 802 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index d4e43f901..66754ba96 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1456,6 +1456,9 @@ def optimize_relational_tree( # Step 10: re-run projection merging, without pushing into joins. root = confirm_root(merge_projects(root, push_into_joins=False)) + # Step 8: re-run column bubbling + root = bubble_column_names(root) + # Step 11: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index f8dbb71a3..8b74b522a 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(n_rows, 0:numeric)), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - PROJECT(columns={'avg_p_retailprice_1': avg_p_retailprice, 'n_rows': n_rows, 'ps_suppkey': ps_suppkey, 'sum_p_retailprice': sum_p_retailprice}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 5a6627a0f..8fa56b648 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,8 +1,7 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - PROJECT(columns={'o_orderkey_1': o_orderkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 365ac3d0e..a3fc5b678 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,18 +1,17 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', revenue_generated)], orderings=[(revenue_generated):asc_first, (p_name):asc_first]) LIMIT(limit=Literal(value=4, type=NumericType()), columns={'p_name': p_name, 'revenue_generated': revenue_generated}, orderings=[(revenue_generated):asc_first, (p_name):asc_first]) PROJECT(columns={'p_name': p_name, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) - JOIN(condition=t0.anything_ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - PROJECT(columns={'anything_ps_partkey_1': anything_ps_partkey, 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 82e33e815..8071a16f2 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,18 +1,17 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', revenue_ratio)], orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) LIMIT(limit=Literal(value=3, type=NumericType()), columns={'p_name': p_name, 'revenue_ratio': revenue_ratio}, orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) PROJECT(columns={'p_name': p_name, 'revenue_ratio': ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)}) - JOIN(condition=t0.anything_ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - PROJECT(columns={'anything_ps_partkey_1': anything_ps_partkey, 'sum_l_quantity': sum_l_quantity, 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index e5ed44130..df12efecf 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('cust_key', c_custkey_1), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey_1, 'n_rows': n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey, 'n_rows': n_rows_1, 'n_rows_1': n_rows}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index fe46e16ce..98c75eec1 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,9 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey_1, 'n_rows': n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index c20c51d81..240fcb590 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders_1, 'n_rows': t0.n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders_1': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t0.n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index e8d037b92..9f93ea84e 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -1,15 +1,13 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier', s_name), ('best_part', p_name), ('best_quantity', ps_availqty)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t1.s_name_1}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 's_name_1': s_name, 's_nationkey_1': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) - FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 'ps_suppkey_1': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 6fdd01f9f..4d1bb2447 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -3,12 +3,11 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 34dc71b2e..953b939f0 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,18 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index cb55fd26f..6676f3f68 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,29 +1,26 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_expr_10': t0.sum_expr_10_1, 'sum_expr_7': t0.sum_expr_7_1, 'sum_n_rows': t0.sum_n_rows_1}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_expr_10_1': sum_expr_10, 'sum_expr_7_1': sum_expr_7, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey_1, 'expr_10': t0.n_rows_1, 'expr_7': t0.expr_7_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'expr_7_1': expr_7, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey_1, 'expr_7': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey_1': c_nationkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows, 'sum_n_rows_2': sum_n_rows_2}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index 20bfbcac7..dd8ca64e5 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -3,12 +3,11 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t1.sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 4629f2fa8..848a95bb5 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -3,12 +3,11 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) PROJECT(columns={'agg_2': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 1a8005c46..cda098921 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,18 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + PROJECT(columns={'n_nations': sum_agg_0, 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_j.txt b/tests/test_plan_refsols/common_prefix_j.txt index db722c70a..a338a9b8e 100644 --- a/tests/test_plan_refsols/common_prefix_j.txt +++ b/tests/test_plan_refsols/common_prefix_j.txt @@ -1,8 +1,7 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name_1, 'r_name': t1.r_name_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_k.txt b/tests/test_plan_refsols/common_prefix_k.txt index 28f7e96d9..4c3a0abf6 100644 --- a/tests/test_plan_refsols/common_prefix_k.txt +++ b/tests/test_plan_refsols/common_prefix_k.txt @@ -1,8 +1,7 @@ ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name_1, 'r_name': t1.r_name_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index d8911515c..027e4d8e7 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,19 +1,18 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal_1, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows_1, 'sum_s_acctbal': t1.sum_s_acctbal_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows_1': n_rows, 'sum_s_acctbal_1': sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 24dfe2447..f91eee7b1 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,20 +1,18 @@ -ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal_1), ('selected_suppliers_max', max_s_acctbal_1), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name_1)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) +ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) - JOIN(condition=t0.n_regionkey_1 == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal_1, 'min_s_acctbal': t0.min_s_acctbal_1, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 14fc1ed20..907f12e2a 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -10,14 +10,13 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5_1, 'l_orderkey': t0.l_orderkey_1, 'l_suppkey': t0.l_suppkey_1, 'p_retailprice': t1.p_retailprice}) - PROJECT(columns={'agg_5_1': agg_5, 'l_orderkey_1': l_orderkey, 'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 9b9ef4933..3d779be70 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(ordering_3_1):asc_first, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3_1': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(ordering_3):asc_first, (c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) PROJECT(columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_name_1': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index 634dd4f69..e7f6f5fd0 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_spent', total_spent), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice_1, 'max_p_name': max_p_name_1, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_l_extendedprice_1': max_l_extendedprice, 'max_p_name_1': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 32ebc4101..355dc9ad3 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', total_spent)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice_1, 'max_anything_p_name': max_anything_p_name_1, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice_1': max_anything_anything_l_extendedprice, 'max_anything_p_name_1': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index a1733f226..2c10e35a7 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_w.txt b/tests/test_plan_refsols/common_prefix_w.txt index 885853056..4b633dbd0 100644 --- a/tests/test_plan_refsols/common_prefix_w.txt +++ b/tests/test_plan_refsols/common_prefix_w.txt @@ -1,10 +1,9 @@ ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'o_orderkey': o_orderkey}, orderings=[(o_orderkey):asc_first]) - JOIN(condition=t0.o_custkey == t1.c_custkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - PROJECT(columns={'c_custkey_1': c_custkey, 'n_name_1': n_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 3e1783235..c5da9fde3 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -4,17 +4,16 @@ ROOT(columns=[('n', n)], orderings=[]) FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index c94a329b1..aa5568ea8 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -4,21 +4,19 @@ ROOT(columns=[('n', n_rows)], orderings=[]) FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price_1, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'global_avg_price_1': global_avg_price, 'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price_1, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'global_avg_price_1': global_avg_price, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index eb1ec52c0..af102dee8 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,8 +1,8 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orders_in_range)], orderings=[(year_7):asc_first, (month_6):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'month_6': ANYTHING(month), 'n_orders_in_range': COUNT(), 'year_7': ANYTHING(year)}) FILTER(condition=MONOTONIC(prev_month_avg_price, o_totalprice, avg_o_totalprice) | MONOTONIC(avg_o_totalprice, o_totalprice, prev_month_avg_price), columns={'month': month, 'year': year}) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice_1, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) - PROJECT(columns={'avg_o_totalprice_1': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) + JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) + PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 7aefdb4d0..e7c7cde56 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -3,17 +3,16 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) PROJECT(columns={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'o_orderkey_1': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 4ef8b131d..ac9583af3 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -4,16 +4,15 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_regionkey': t0.n_regionkey_1}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'o_orderkey_1': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 1ee36d030..3f6839aaf 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -4,14 +4,13 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_regionkey': t0.n_regionkey_1}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'o_orderkey_1': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 0c9acaa5b..f3b0080b7 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,21 +1,20 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', DEFAULT_TO(n_rows, 0:numeric)), ('n_above_avg_suppliers', DEFAULT_TO(n_rows_1, 0:numeric)), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name_1, 'anything_n_regionkey': t0.anything_n_regionkey_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'min_c_acctbal': t0.min_c_acctbal_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - PROJECT(columns={'anything_n_name_1': anything_n_name, 'anything_n_nationkey_1': anything_n_nationkey, 'anything_n_regionkey_1': anything_n_regionkey, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows': n_rows}) - JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) - FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) + FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 761246fb9..9f4248442 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -3,18 +3,17 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': LOWER(r_name)}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal_1, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) - PROJECT(columns={'avg_cust_acctbal_1': avg_cust_acctbal, 'n_name_1': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index 1cfd16b15..c868fedf4 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,18 +1,17 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(revenue), 'median_rev': MEDIAN(revenue), 'nation_name': ANYTHING(n_name)}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey}) - PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name_1': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/correl_32.txt b/tests/test_plan_refsols/correl_32.txt index a88f51fdb..e1c8d129c 100644 --- a/tests/test_plan_refsols/correl_32.txt +++ b/tests/test_plan_refsols/correl_32.txt @@ -1,6 +1,6 @@ ROOT(columns=[('customer_name', anything_c_name), ('delta', delta)], orderings=[(delta):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name_1, 'delta': delta}, orderings=[(delta):asc_first]) - PROJECT(columns={'anything_c_name_1': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name, 'delta': delta}, orderings=[(delta):asc_first]) + PROJECT(columns={'anything_c_name': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_name': ANYTHING(c_name), 'median_s_acctbal': MEDIAN(s_acctbal)}) JOIN(condition=SLICE(t1.s_phone, -1:numeric, None:unknown, None:unknown) == SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 's_acctbal': t1.s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 1bbb483bf..4ee636ab6 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -5,15 +5,14 @@ ROOT(columns=[('n', n)], orderings=[]) JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_name_1': n_name, 's_suppkey_1': s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=l_linestatus == 'F':string & l_returnflag == 'N':string, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_linestatus': l_linestatus, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_returnflag': l_returnflag, 'l_suppkey': l_suppkey}) FILTER(condition=YEAR(o_orderdate) >= 1995:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index edd89b822..21bb4c0e6 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -7,13 +7,12 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': year}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) PROJECT(columns={'agg_0': 1:numeric, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'sum_l_extendedprice': sum_l_extendedprice, 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - PROJECT(columns={'n_name_1': n_name, 'o_orderdate': o_orderdate, 'o_orderkey_1': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index b7b448977..1fa05bd28 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('largest_diff', largest_diff)], orderings=[(largest_diff):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name_1, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) - PROJECT(columns={'c_name_1': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) + PROJECT(columns={'c_name': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index a6322fc0c..52700565c 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_recent_value', total_recent_value)], orderings=[(total_recent_value):desc_last]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name_1, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) - PROJECT(columns={'c_name_1': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) + PROJECT(columns={'c_name': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index dedd53aad..b1b75726f 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,6 +1,6 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', c_custkey), ('c_bal', c_acctbal), ('cr_bal', cr_bal), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', cg_key)], orderings=[(n_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal_1, 'c_custkey': c_custkey_1, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty_1, 'ps_partkey': ps_partkey_1, 'r_name': r_name_1, 's_suppkey': s_suppkey_1}, orderings=[(n_name):asc_first]) - PROJECT(columns={'account_balance_21': account_balance_21, 'c_acctbal_1': c_acctbal, 'c_custkey_1': c_custkey, 'key_54': key_54, 'n_name': n_name, 'ps_availqty_1': ps_availqty, 'ps_partkey_1': ps_partkey, 'r_name_1': r_name, 's_suppkey_1': s_suppkey}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': cg_key, 'cr_bal': cr_bal, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}, orderings=[(n_name):asc_first]) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'key_54': t1.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index 5bbc46de5..d2b29d459 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,18 +1,14 @@ -ROOT(columns=[('event_name', ev_name_1), ('era_name', er_name_1), ('event_year', YEAR(ev_dt)), ('season_name', s_name_1), ('tod', t_name_1)], orderings=[(ev_dt):asc_first]) - LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name_1': er_name, 'ev_dt': ev_dt, 'ev_name_1': ev_name, 's_name_1': s_name, 't_name_1': t_name}, orderings=[(ev_dt):asc_first]) - JOIN(condition=t0.ev_key_1 == t1.ev_key_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_name': t0.ev_name_1, 's_name': t0.s_name_1, 't_name': t1.t_name_1}) - PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key_1': ev_key, 'ev_name_1': ev_name, 's_name_1': s_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name_1, 's_name': t1.s_name_1}) - PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key': ev_key, 'ev_name_1': ev_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) - FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - PROJECT(columns={'ev_key': ev_key, 's_name_1': s_name}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - PROJECT(columns={'ev_key_1': ev_key, 't_name_1': t_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) +ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) + LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/exponentiation.txt b/tests/test_plan_refsols/exponentiation.txt index d87d0c19e..0c1fec6b1 100644 --- a/tests/test_plan_refsols/exponentiation.txt +++ b/tests/test_plan_refsols/exponentiation.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('low_square', low_square_1), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(low_square_1):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_square_1': low_square, 'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) +ROOT(columns=[('low_square', low_square), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(low_square):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_square': low_square, 'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) PROJECT(columns={'low_square': sbDpLow ** 2:numeric, 'sbDpLow': sbDpLow}) SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) diff --git a/tests/test_plan_refsols/floor_and_ceil_2.txt b/tests/test_plan_refsols/floor_and_ceil_2.txt index 2a4dcec3a..29b9464c1 100644 --- a/tests/test_plan_refsols/floor_and_ceil_2.txt +++ b/tests/test_plan_refsols/floor_and_ceil_2.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', total_cost_1)], orderings=[(total_cost_1):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost_1': total_cost}, orderings=[(total_cost):desc_last]) +ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', total_cost)], orderings=[(total_cost):desc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': total_cost}, orderings=[(total_cost):desc_last]) PROJECT(columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': CEIL(ps_supplycost * FLOOR(ps_availqty))}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index 179adc4d0..c9f1900fe 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,13 +1,12 @@ ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) - JOIN(condition=t0.l_orderkey_1 == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name_1}) - PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey_1': l_orderkey, 'l_shipdate': l_shipdate, 'n_name_1': n_name}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt index 6f090bc1b..e06f975c8 100644 --- a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt +++ b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt @@ -1,16 +1,14 @@ ROOT(columns=[('order_year', YEAR(o_orderdate)), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) - JOIN(condition=t0.l_partkey_1 == t1.ps_partkey & t0.l_suppkey_1 == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name_1, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name_1, 'supplier_region_name': t1.r_name}) - PROJECT(columns={'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey, 'n_name_1': n_name, 'o_orderdate': o_orderdate, 'r_name_1': r_name}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name_1}) - PROJECT(columns={'n_name_1': n_name, 'o_orderdate': o_orderdate, 'o_orderkey_1': o_orderkey, 'r_name_1': r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index e2d259395..d185fd519 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,12 +1,11 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) - PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index 6227e214c..2186c653c 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,11 +1,8 @@ ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal_1, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - PROJECT(columns={'avg_c_acctbal_1': avg_c_acctbal, 'max_c_acctbal_1': max_c_acctbal, 'n_nationkey': n_nationkey, 'sum_c_acctbal': sum_c_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - PROJECT(columns={'avg_c_acctbal_1': avg_c_acctbal, 'c_nationkey': c_nationkey, 'max_c_acctbal_1': max_c_acctbal, 'sum_c_acctbal': sum_c_acctbal}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 's_nationkey': s_nationkey, 'sum_s_acctbal': sum_s_acctbal}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index 21297f633..7706c56b5 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,10 +1,9 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) - PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 7ac114d5f..254b7ce5c 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,6 +1,6 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', days_since_first_order)], orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey_1}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey_1': o_orderkey}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey': o_orderkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 93d6f9723..9bd706f37 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int_1), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'l_discount': l_discount, 'l_receiptdate': l_receiptdate, 'p_size': p_size, 'retail_price_int_1': retail_price_int}, orderings=[(l_discount):desc_last]) +ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'l_discount': l_discount, 'l_receiptdate': l_receiptdate, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(l_discount):desc_last]) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) - PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int_1}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int_1': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 4573cc866..7d3cc9e81 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(ordering_2_1):desc_last, (p_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'agg_1': agg_1, 'ordering_2_1': ordering_2, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) +ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(ordering_2):desc_last, (p_name):asc_first]) + LIMIT(limit=Literal(value=3, type=NumericType()), columns={'agg_1': agg_1, 'ordering_2': ordering_2, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) PROJECT(columns={'agg_1': agg_1, 'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 085f35880..720bf6ef4 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('rank', rank)], orderings=[(rank):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name_1, 'rank': rank}, orderings=[(rank):asc_first]) - PROJECT(columns={'n_name_1': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'rank': rank}, orderings=[(rank):asc_first]) + PROJECT(columns={'n_name': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index c21a1b136..9372d9bb7 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', rank)], orderings=[(p_partkey):asc_first]) - LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name_1, 'rank': rank}, orderings=[(p_partkey):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'r_name_1': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) + LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': rank}, orderings=[(p_partkey):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index d32e6626b..889ab189c 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name_1, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) - PROJECT(columns={'c_name_1': c_name, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) - FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 4da08ecfe..bb1b832a9 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,16 +1,15 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name_1, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) - PROJECT(columns={'n_orders': n_orders, 'p_name_1': p_name, 's_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) - FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt index d419ed393..5ed26b322 100644 --- a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('name', r_name), ('n_cust', n_cust)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_cust': t1.n_cust, 'r_name': t0.r_name}) +ROOT(columns=[('name', r_name), ('n_cust', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=EPSILON(c_acctbal, avg_balance, avg_balance * 0.1:numeric), columns={'n_regionkey': n_regionkey}) - PROJECT(columns={'avg_balance': RELAVG(args=[c_acctbal], partition=[n_regionkey], order=[]), 'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=EPSILON(c_acctbal, avg_balance, avg_balance * 0.1:numeric), columns={'n_regionkey': n_regionkey}) + PROJECT(columns={'avg_balance': RELAVG(args=[c_acctbal], partition=[n_regionkey], order=[]), 'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt index 93d84c026..03ef1ee24 100644 --- a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt +++ b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', cvp_ab_otp)], orderings=[(r_name):asc_first]) - PROJECT(columns={'cvp_ab_otp': ROUND(agg_0, 3:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, expr_1)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'expr_1': t1.expr_1, 'n_regionkey': t0.n_regionkey}) +ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', ROUND(agg_0, 3:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, expr_1)}) + PROJECT(columns={'c_acctbal': c_acctbal, 'expr_1': o_totalprice / 1000000.0:numeric, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - PROJECT(columns={'expr_1': o_totalprice / 1000000.0:numeric, 'o_custkey': o_custkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_decode3.txt b/tests/test_plan_refsols/sqlite_udf_decode3.txt index 0a5523694..f213966cc 100644 --- a/tests/test_plan_refsols/sqlite_udf_decode3.txt +++ b/tests/test_plan_refsols/sqlite_udf_decode3.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('key', o_orderkey), ('val', val)], orderings=[(o_orderkey):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderkey': o_orderkey, 'val': val}, orderings=[(o_orderkey):asc_first]) - PROJECT(columns={'o_orderkey': o_orderkey, 'val': DECODE3(INTEGER(SLICE(o_orderpriority, None:unknown, 1:numeric, None:unknown)), 1:numeric, 'A':string, 2:numeric, 'B':string, 3:numeric, 'C':string, 'D':string)}) - FILTER(condition=o_clerk == 'Clerk#000000951':string, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('key', o_orderkey), ('val', DECODE3(INTEGER(SLICE(o_orderpriority, None:unknown, 1:numeric, None:unknown)), 1:numeric, 'A':string, 2:numeric, 'B':string, 3:numeric, 'C':string, 'D':string))], orderings=[(o_orderkey):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}, orderings=[(o_orderkey):asc_first]) + FILTER(condition=o_clerk == 'Clerk#000000951':string, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/sqlite_udf_format_datetime.txt b/tests/test_plan_refsols/sqlite_udf_format_datetime.txt index b485de9d7..1a89dd509 100644 --- a/tests/test_plan_refsols/sqlite_udf_format_datetime.txt +++ b/tests/test_plan_refsols/sqlite_udf_format_datetime.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('key', o_orderkey), ('d1', d1), ('d2', d2), ('d3', d3), ('d4', d4)], orderings=[(o_totalprice):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'d1': d1, 'd2': d2, 'd3': d3, 'd4': d4, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):asc_first]) - PROJECT(columns={'d1': FORMAT_DATETIME('%d/%m/%Y':string, o_orderdate), 'd2': FORMAT_DATETIME('%Y:%j':string, o_orderdate), 'd3': INTEGER(FORMAT_DATETIME('%s':string, o_orderdate)), 'd4': INTEGER(FORMAT_DATETIME_VARIADIC('%Y%m%d':string, o_orderdate, '+39 days':string, 'start of month':string)), 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('key', o_orderkey), ('d1', FORMAT_DATETIME('%d/%m/%Y':string, o_orderdate)), ('d2', FORMAT_DATETIME('%Y:%j':string, o_orderdate)), ('d3', INTEGER(FORMAT_DATETIME('%s':string, o_orderdate))), ('d4', INTEGER(FORMAT_DATETIME_VARIADIC('%Y%m%d':string, o_orderdate, '+39 days':string, 'start of month':string)))], orderings=[(o_totalprice):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):asc_first]) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_gcat.txt b/tests/test_plan_refsols/sqlite_udf_gcat.txt index a9899705e..9c7bd9d68 100644 --- a/tests/test_plan_refsols/sqlite_udf_gcat.txt +++ b/tests/test_plan_refsols/sqlite_udf_gcat.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('name', r_name), ('c1', c1), ('c2', c2), ('c3', c3)], orderings=[(r_name):asc_first]) - PROJECT(columns={'c1': GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):asc_last]), 'c2': GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):desc_first]), 'c3': GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):asc_last], cumulative=True), 'r_name': r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('name', r_name), ('c1', GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):asc_last])), ('c2', GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):desc_first])), ('c3', GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):asc_last], cumulative=True))], orderings=[(r_name):asc_first]) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/sqlite_udf_nval.txt b/tests/test_plan_refsols/sqlite_udf_nval.txt index db0ba53e1..6435249df 100644 --- a/tests/test_plan_refsols/sqlite_udf_nval.txt +++ b/tests/test_plan_refsols/sqlite_udf_nval.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('rname', r_name), ('nname', n_name), ('v1', v1), ('v2', v2), ('v3', v3), ('v4', v4)], orderings=[(r_name):asc_first, (n_name):asc_first]) - PROJECT(columns={'n_name': n_name, 'r_name': r_name, 'v1': NVAL(args=[n_name, 3:numeric], partition=[], order=[(n_name):asc_last]), 'v2': NVAL(args=[n_name, 1:numeric], partition=[n_regionkey], order=[(n_name):asc_last]), 'v3': NVAL(args=[n_name, 2:numeric], partition=[n_regionkey], order=[(n_name):asc_last], frame=(1, None)), 'v4': NVAL(args=[n_name, 5:numeric], partition=[], order=[(n_name):asc_last], cumulative=True)}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('rname', r_name), ('nname', n_name), ('v1', NVAL(args=[n_name, 3:numeric], partition=[], order=[(n_name):asc_last])), ('v2', NVAL(args=[n_name, 1:numeric], partition=[n_regionkey], order=[(n_name):asc_last])), ('v3', NVAL(args=[n_name, 2:numeric], partition=[n_regionkey], order=[(n_name):asc_last], frame=(1, None))), ('v4', NVAL(args=[n_name, 5:numeric], partition=[], order=[(n_name):asc_last], cumulative=True))], orderings=[(r_name):asc_first, (n_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt index 54a6ded26..b9b7d165d 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('name', r_name), ('pct_cust_positive', pct_cust_positive), ('pct_supp_positive', pct_supp_positive)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'pct_cust_positive': t0.pct_cust_positive, 'pct_supp_positive': t1.pct_supp_positive, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'pct_cust_positive': t1.pct_cust_positive, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) +ROOT(columns=[('name', r_name), ('pct_cust_positive', ROUND(percentage_expr_2, 2:numeric)), ('pct_supp_positive', ROUND(percentage_expr_3, 2:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'pct_cust_positive': ROUND(percentage_expr_2, 2:numeric)}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(expr_2)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'expr_2': t1.expr_2, 'n_regionkey': t0.n_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(expr_2)}) + PROJECT(columns={'expr_2': POSITIVE(c_acctbal), 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'c_nationkey': c_nationkey, 'expr_2': POSITIVE(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'pct_supp_positive': ROUND(percentage_expr_3, 2:numeric)}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(expr_3)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'expr_3': t1.expr_3, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(expr_3)}) + PROJECT(columns={'expr_3': POSITIVE(s_acctbal), 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'expr_3': POSITIVE(s_acctbal), 's_nationkey': s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_relmin.txt b/tests/test_plan_refsols/sqlite_udf_relmin.txt index 9d68f1b7f..a4606bb58 100644 --- a/tests/test_plan_refsols/sqlite_udf_relmin.txt +++ b/tests/test_plan_refsols/sqlite_udf_relmin.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('month', month), ('n_orders', n_rows), ('m1', m1), ('m2', m2), ('m3', m3)], orderings=[(month):asc_first]) - PROJECT(columns={'m1': RELMIN(args=[n_rows], partition=[], order=[]), 'm2': RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], cumulative=True), 'm3': RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], frame=(-1, 1)), 'month': month, 'n_rows': n_rows}) - AGGREGATE(keys={'month': month}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate)}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('month', month), ('n_orders', n_rows), ('m1', RELMIN(args=[n_rows], partition=[], order=[])), ('m2', RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], cumulative=True)), ('m3', RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], frame=(-1, 1)))], orderings=[(month):asc_first]) + AGGREGATE(keys={'month': month}, aggregations={'n_rows': COUNT()}) + PROJECT(columns={'month': MONTH(o_orderdate)}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index f63e075d7..54d45c7b6 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -7,12 +7,10 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey_1, 'sum_l_quantity': t0.sum_l_quantity}) - PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey_1': ps_partkey, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_rows_1': n_rows, 'sum_l_quantity': sum_l_quantity}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index a37b6f7bc..6a203c579 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('supplier_name', s_name_1), ('nation_name', n_name_1), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', national_qty_pct_1)], orderings=[(national_qty_pct_1):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name_1': n_name, 'national_qty_pct_1': national_qty_pct, 's_name_1': s_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(national_qty_pct):desc_last]) +ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', national_qty_pct)], orderings=[(national_qty_pct):desc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'national_qty_pct': national_qty_pct, 's_name': s_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(national_qty_pct):desc_last]) PROJECT(columns={'n_name': n_name, 'national_qty_pct': 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]), 's_name': s_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) diff --git a/tests/test_plan_refsols/suppliers_bal_diffs.txt b/tests/test_plan_refsols/suppliers_bal_diffs.txt index 916816a00..e95879b55 100644 --- a/tests/test_plan_refsols/suppliers_bal_diffs.txt +++ b/tests/test_plan_refsols/suppliers_bal_diffs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', acctbal_delta)], orderings=[(acctbal_delta):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name_1, 's_name': s_name_1}, orderings=[(acctbal_delta):desc_last]) - PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name_1': r_name, 's_name_1': s_name}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name, 's_name': s_name}, orderings=[(acctbal_delta):desc_last]) + PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name': r_name, 's_name': s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 0d29adc03..a380dffc1 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,6 +1,6 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', purchase_country), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name_1, 'ir': ir, 'purchase_country': name_2}, orderings=[(ir):desc_last]) - PROJECT(columns={'co_name_1': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'name_2': name_2}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name, 'ir': ir, 'purchase_country': purchase_country}, orderings=[(ir):desc_last]) + PROJECT(columns={'co_name': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'purchase_country': name_2}) JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index baa9bd355..79a611d7c 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -8,9 +8,8 @@ ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) PROJECT(columns={'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_id_1 == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - PROJECT(columns={'de_id_1': de_id, 'pr_release': pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index e46df112f..6f30065aa 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,22 +1,21 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows_1, 'month': t0.month_1, 'n_rows': t1.n_rows, 'year': t0.year_1}) - PROJECT(columns={'ca_dt_1': ca_dt, 'month_1': month, 'n_rows_1': n_rows, 'year_1': year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index e2c60b089..df4147ff6 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,6 +1,6 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand_1, 'pr_name': pr_name_1, 'pr_type': pr_type_1}, orderings=[(ir):desc_last]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand_1': pr_brand, 'pr_name_1': pr_name, 'pr_type_1': pr_type}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}, orderings=[(ir):desc_last]) + PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 3734ddf0e..8eac1f4c8 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -4,21 +4,20 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index c00cf0803..25ad10b87 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,16 +1,15 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'n_incidents': n_incidents, 'year': year}) PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index 4ab824798..f92f003ec 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,15 +1,14 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal_1, 'c_address': c_address_1, 'c_comment': c_comment_1, 'c_custkey': c_custkey, 'c_name': c_name_1, 'c_phone': c_phone_1, 'n_name': n_name_1}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey': c_custkey, 'c_name_1': c_name, 'c_phone_1': c_phone, 'n_name_1': n_name}) - JOIN(condition=t0.c_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal_1, 'c_address': t0.c_address_1, 'c_comment': t0.c_comment_1, 'c_custkey': t0.c_custkey_1, 'c_name': t0.c_name_1, 'c_phone': t0.c_phone_1, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - PROJECT(columns={'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'c_nationkey_1': c_nationkey, 'c_phone_1': c_phone, 'sum_expr_1': sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) + LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) + PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index 06c5ad1a2..bbad37875 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -6,10 +6,9 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 3baa9fc80..6f4efa2d8 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -9,12 +9,11 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'p_partkey_1': p_partkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index 99e4bd091..e8f41b9b7 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,19 +1,18 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey_1}) - PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index b4f612231..44a91ea4a 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,6 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'avg_c_acctbal': avg_c_acctbal, 'c_nationkey': c_nationkey, 'count_c_acctbal_1': count_c_acctbal, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows_1': n_rows, 'sum_c_acctbal_1': sum_c_acctbal}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal': COUNT(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal': COUNT(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index c85511eca..a367c4443 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId_1, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId_1': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) + LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index ac1149b97..80af8f609 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId_1, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId_1': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) + LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/years_months_days_hours_datediff.txt b/tests/test_plan_refsols/years_months_days_hours_datediff.txt index f7f1686d0..6a736c481 100644 --- a/tests/test_plan_refsols/years_months_days_hours_datediff.txt +++ b/tests/test_plan_refsols/years_months_days_hours_datediff.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', years_diff_1), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(years_diff_1):asc_first]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'years_diff_1': years_diff}, orderings=[(years_diff):asc_first]) +ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', years_diff), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(years_diff):asc_first]) + LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'years_diff': years_diff}, orderings=[(years_diff):asc_first]) PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'years_diff': DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)}) FILTER(condition=YEAR(sbTxDateTime) < 2025:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_sql_refsols/datediff_ansi.sql b/tests/test_sql_refsols/datediff_ansi.sql index aa7347c30..7b2fcb32f 100644 --- a/tests/test_sql_refsols/datediff_ansi.sql +++ b/tests/test_sql_refsols/datediff_ansi.sql @@ -1,19 +1,19 @@ WITH _t0 AS ( SELECT - DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff_1, - sbtxdatetime + sbtxdatetime, + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 ORDER BY - DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) + years_diff LIMIT 30 ) SELECT sbtxdatetime AS x, CAST('2025-05-02 11:00:00' AS TIMESTAMP) AS y1, CAST('2023-04-03 13:16:30' AS TIMESTAMP) AS y, - years_diff_1 AS years_diff, + years_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MONTH) AS months_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), DAY) AS days_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), HOUR) AS hours_diff, @@ -21,4 +21,4 @@ SELECT DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), SECOND) AS seconds_diff FROM _t0 ORDER BY - years_diff_1 + years_diff diff --git a/tests/test_sql_refsols/datediff_sqlite.sql b/tests/test_sql_refsols/datediff_sqlite.sql index 734f72ca9..4db97a7d2 100644 --- a/tests/test_sql_refsols/datediff_sqlite.sql +++ b/tests/test_sql_refsols/datediff_sqlite.sql @@ -1,19 +1,19 @@ WITH _t0 AS ( SELECT - CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff_1, - sbtxdatetime + sbtxdatetime, + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 ORDER BY - CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) + years_diff LIMIT 30 ) SELECT sbtxdatetime AS x, '2025-05-02 11:00:00' AS y1, '2023-04-03 13:16:30' AS y, - years_diff_1 AS years_diff, + years_diff, ( CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) ) * 12 + CAST(STRFTIME('%m', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%m', sbtxdatetime) AS INTEGER) AS months_diff, @@ -37,4 +37,4 @@ SELECT ) * 60 + CAST(STRFTIME('%S', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%S', sbtxdatetime) AS INTEGER) AS seconds_diff FROM _t0 ORDER BY - years_diff_1 + years_diff diff --git a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql index bb8ea4609..81e55dbda 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql @@ -8,20 +8,20 @@ WITH _s1 AS ( sbtxtickerid ), _t0 AS ( SELECT - sbticker.sbtickersymbol AS sbtickersymbol_1, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount_1, - _s1.n_rows + _s1.n_rows, + sbticker.sbtickersymbol, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - COALESCE(_s1.sum_sbtxamount, 0) DESC + total_amount DESC LIMIT 10 ) SELECT - sbtickersymbol_1 AS symbol, + sbtickersymbol AS symbol, COALESCE(n_rows, 0) AS num_transactions, - total_amount_1 AS total_amount + total_amount FROM _t0 ORDER BY - total_amount_1 DESC + total_amount DESC diff --git a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql index bb8ea4609..81e55dbda 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql @@ -8,20 +8,20 @@ WITH _s1 AS ( sbtxtickerid ), _t0 AS ( SELECT - sbticker.sbtickersymbol AS sbtickersymbol_1, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount_1, - _s1.n_rows + _s1.n_rows, + sbticker.sbtickersymbol, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - COALESCE(_s1.sum_sbtxamount, 0) DESC + total_amount DESC LIMIT 10 ) SELECT - sbtickersymbol_1 AS symbol, + sbtickersymbol AS symbol, COALESCE(n_rows, 0) AS num_transactions, - total_amount_1 AS total_amount + total_amount FROM _t0 ORDER BY - total_amount_1 DESC + total_amount DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql index 7a80c3684..7bdd812ac 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql @@ -10,22 +10,22 @@ WITH _s1 AS ( salesperson_id ), _t0 AS ( SELECT - salespersons.first_name AS first_name_1, - salespersons.last_name AS last_name_1, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, - _s1.n_rows + salespersons.first_name, + salespersons.last_name, + _s1.n_rows, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons LEFT JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - COALESCE(_s1.sum_sale_price, 0) DESC + total_revenue DESC LIMIT 3 ) SELECT - first_name_1 AS first_name, - last_name_1 AS last_name, + first_name, + last_name, COALESCE(n_rows, 0) AS total_sales, - total_revenue_1 AS total_revenue + total_revenue FROM _t0 ORDER BY - total_revenue_1 DESC + total_revenue DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql index 103007c2c..280dd33d8 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql @@ -10,22 +10,22 @@ WITH _s1 AS ( salesperson_id ), _t0 AS ( SELECT - salespersons.first_name AS first_name_1, - salespersons.last_name AS last_name_1, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, - _s1.n_rows + salespersons.first_name, + salespersons.last_name, + _s1.n_rows, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons LEFT JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - COALESCE(_s1.sum_sale_price, 0) DESC + total_revenue DESC LIMIT 3 ) SELECT - first_name_1 AS first_name, - last_name_1 AS last_name, + first_name, + last_name, COALESCE(n_rows, 0) AS total_sales, - total_revenue_1 AS total_revenue + total_revenue FROM _t0 ORDER BY - total_revenue_1 DESC + total_revenue DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 16ac04eee..3134aaf0d 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS n_rows_1, + COUNT(*) AS n_rows, SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales @@ -10,9 +10,9 @@ WITH _s1 AS ( salesperson_id ), _t0 AS ( SELECT - salespersons.first_name AS first_name_1, - salespersons.last_name AS last_name_1, - _s1.n_rows_1 AS n_rows, + salespersons.first_name, + salespersons.last_name, + _s1.n_rows, _s1.sum_sale_price FROM main.salespersons AS salespersons JOIN _s1 AS _s1 @@ -22,8 +22,8 @@ WITH _s1 AS ( LIMIT 5 ) SELECT - first_name_1 AS first_name, - last_name_1 AS last_name, + first_name, + last_name, n_rows AS total_sales, COALESCE(sum_sale_price, 0) AS total_revenue FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index b06f6bb94..4a046fb86 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS n_rows_1, + COUNT(*) AS n_rows, SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales @@ -12,9 +12,9 @@ WITH _s1 AS ( salesperson_id ), _t0 AS ( SELECT - salespersons.first_name AS first_name_1, - salespersons.last_name AS last_name_1, - _s1.n_rows_1 AS n_rows, + salespersons.first_name, + salespersons.last_name, + _s1.n_rows, _s1.sum_sale_price FROM main.salespersons AS salespersons JOIN _s1 AS _s1 @@ -24,8 +24,8 @@ WITH _s1 AS ( LIMIT 5 ) SELECT - first_name_1 AS first_name, - last_name_1 AS last_name, + first_name, + last_name, n_rows AS total_sales, COALESCE(sum_sale_price, 0) AS total_revenue FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql index 3ef87cc68..fe9f62139 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql @@ -8,22 +8,22 @@ WITH _s1 AS ( car_id ), _t0 AS ( SELECT - cars.make AS make_1, - cars.model AS model_1, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, - _s1.n_rows + cars.make, + cars.model, + _s1.n_rows, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id ORDER BY - COALESCE(_s1.sum_sale_price, 0) DESC + total_revenue DESC LIMIT 5 ) SELECT - make_1 AS make, - model_1 AS model, + make, + model, COALESCE(n_rows, 0) AS total_sales, - total_revenue_1 AS total_revenue + total_revenue FROM _t0 ORDER BY - total_revenue_1 DESC + total_revenue DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql index 3ef87cc68..fe9f62139 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql @@ -8,22 +8,22 @@ WITH _s1 AS ( car_id ), _t0 AS ( SELECT - cars.make AS make_1, - cars.model AS model_1, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, - _s1.n_rows + cars.make, + cars.model, + _s1.n_rows, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id ORDER BY - COALESCE(_s1.sum_sale_price, 0) DESC + total_revenue DESC LIMIT 5 ) SELECT - make_1 AS make, - model_1 AS model, + make, + model, COALESCE(n_rows, 0) AS total_sales, - total_revenue_1 AS total_revenue + total_revenue FROM _t0 ORDER BY - total_revenue_1 DESC + total_revenue DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 4e4bdb420..5e675ed08 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -11,20 +11,20 @@ WITH _s1 AS ( receiver_id ), _t0 AS ( SELECT - merchants.name AS name_1, - COALESCE(_s1.sum_amount, 0) AS total_amount_1, - _s1.n_rows + _s1.n_rows, + merchants.name, + COALESCE(_s1.sum_amount, 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid ORDER BY - COALESCE(_s1.sum_amount, 0) DESC + total_amount DESC LIMIT 2 ) SELECT - name_1 AS merchant_name, + name AS merchant_name, COALESCE(n_rows, 0) AS total_transactions, - total_amount_1 AS total_amount + total_amount FROM _t0 ORDER BY - total_amount_1 DESC + total_amount DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index ed3cfe9ea..cd313570f 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -11,20 +11,20 @@ WITH _s1 AS ( receiver_id ), _t0 AS ( SELECT - merchants.name AS name_1, - COALESCE(_s1.sum_amount, 0) AS total_amount_1, - _s1.n_rows + _s1.n_rows, + merchants.name, + COALESCE(_s1.sum_amount, 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid ORDER BY - COALESCE(_s1.sum_amount, 0) DESC + total_amount DESC LIMIT 2 ) SELECT - name_1 AS merchant_name, + name AS merchant_name, COALESCE(n_rows, 0) AS total_transactions, - total_amount_1 AS total_amount + total_amount FROM _t0 ORDER BY - total_amount_1 DESC + total_amount DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index 672017439..313c5c222 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -8,20 +8,20 @@ WITH _s1 AS ( coupon_id ), _t0 AS ( SELECT - coupons.code AS code_1, - COALESCE(_s1.count_txid, 0) AS redemption_count_1, + coupons.code, + COALESCE(_s1.count_txid, 0) AS redemption_count, _s1.sum_amount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid ORDER BY - COALESCE(_s1.count_txid, 0) DESC + redemption_count DESC LIMIT 3 ) SELECT - code_1 AS coupon_code, - redemption_count_1 AS redemption_count, + code AS coupon_code, + redemption_count, COALESCE(sum_amount, 0) AS total_discount FROM _t0 ORDER BY - redemption_count_1 DESC + redemption_count DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index 672017439..313c5c222 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -8,20 +8,20 @@ WITH _s1 AS ( coupon_id ), _t0 AS ( SELECT - coupons.code AS code_1, - COALESCE(_s1.count_txid, 0) AS redemption_count_1, + coupons.code, + COALESCE(_s1.count_txid, 0) AS redemption_count, _s1.sum_amount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid ORDER BY - COALESCE(_s1.count_txid, 0) DESC + redemption_count DESC LIMIT 3 ) SELECT - code_1 AS coupon_code, - redemption_count_1 AS redemption_count, + code AS coupon_code, + redemption_count, COALESCE(sum_amount, 0) AS total_discount FROM _t0 ORDER BY - redemption_count_1 DESC + redemption_count DESC diff --git a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql index abf9e60c6..5d9adaf39 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql @@ -5,11 +5,11 @@ WITH _s2 AS ( FROM events ), _t0 AS ( SELECT - eras.er_name AS er_name_1, - events.ev_name AS ev_name_1, - seasons.s_name AS s_name_1, - times.t_name AS t_name_1, - events.ev_dt + eras.er_name, + events.ev_dt, + events.ev_name, + seasons.s_name, + times.t_name FROM events AS events JOIN eras AS eras ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) @@ -32,11 +32,11 @@ WITH _s2 AS ( LIMIT 6 ) SELECT - ev_name_1 AS event_name, - er_name_1 AS era_name, + ev_name AS event_name, + er_name AS era_name, EXTRACT(YEAR FROM CAST(ev_dt AS DATETIME)) AS event_year, - s_name_1 AS season_name, - t_name_1 AS tod + s_name AS season_name, + t_name AS tod FROM _t0 ORDER BY ev_dt diff --git a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql index 972bff76a..1cbe48ccc 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql @@ -5,11 +5,11 @@ WITH _s2 AS ( FROM events ), _t0 AS ( SELECT - eras.er_name AS er_name_1, - events.ev_name AS ev_name_1, - seasons.s_name AS s_name_1, - times.t_name AS t_name_1, - events.ev_dt + eras.er_name, + events.ev_dt, + events.ev_name, + seasons.s_name, + times.t_name FROM events AS events JOIN eras AS eras ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) @@ -32,11 +32,11 @@ WITH _s2 AS ( LIMIT 6 ) SELECT - ev_name_1 AS event_name, - er_name_1 AS era_name, + ev_name AS event_name, + er_name AS era_name, CAST(STRFTIME('%Y', ev_dt) AS INTEGER) AS event_year, - s_name_1 AS season_name, - t_name_1 AS tod + s_name AS season_name, + t_name AS tod FROM _t0 ORDER BY ev_dt diff --git a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql index 2d82d40ff..30ed1ca1b 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql @@ -1,19 +1,19 @@ WITH _t0 AS ( SELECT - CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost_1, ps_availqty, ps_partkey, - ps_suppkey + ps_suppkey, + CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost FROM tpch.partsupp ORDER BY - CEIL(ps_supplycost * FLOOR(ps_availqty)) DESC + total_cost DESC LIMIT 10 ) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, FLOOR(ps_availqty) AS complete_parts, - total_cost_1 AS total_cost + total_cost FROM _t0 ORDER BY - total_cost_1 DESC + total_cost DESC diff --git a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql index 251ef1ebf..95f227be8 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql @@ -1,21 +1,8 @@ WITH _t0 AS ( SELECT - CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) + CASE - WHEN CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) < ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) - THEN 1 - ELSE 0 - END AS total_cost_1, ps_availqty, ps_partkey, - ps_suppkey - FROM tpch.partsupp - ORDER BY + ps_suppkey, CAST(ps_supplycost * ( CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END ) AS INTEGER) + CASE @@ -26,14 +13,17 @@ WITH _t0 AS ( ) THEN 1 ELSE 0 - END DESC + END AS total_cost + FROM tpch.partsupp + ORDER BY + total_cost DESC LIMIT 10 ) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END AS complete_parts, - total_cost_1 AS total_cost + total_cost FROM _t0 ORDER BY - total_cost_1 DESC + total_cost DESC diff --git a/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql index 68e1f77a1..28cbbc248 100644 --- a/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT AVG(customer.c_acctbal) OVER (PARTITION BY nation.n_regionkey) AS avg_balance, customer.c_acctbal, @@ -8,9 +8,9 @@ WITH _t3 AS ( ON customer.c_nationkey = nation.n_nationkey ), _s3 AS ( SELECT - COALESCE(COUNT(*), 0) AS n_cust, + COUNT(*) AS n_rows, n_regionkey - FROM _t3 + FROM _t2 WHERE ABS(avg_balance - c_acctbal) <= avg_balance * 0.1 GROUP BY @@ -18,7 +18,7 @@ WITH _t3 AS ( ) SELECT region.r_name AS name, - _s3.n_cust + COALESCE(_s3.n_rows, 0) AS n_cust FROM tpch.region AS region JOIN _s3 AS _s3 ON _s3.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql index b4a7156ba..b6fe25cdb 100644 --- a/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql @@ -1,3 +1,14 @@ +WITH _t0 AS ( + SELECT + o_orderkey, + o_orderpriority + FROM tpch.orders + WHERE + o_clerk = 'Clerk#000000951' + ORDER BY + o_orderkey + LIMIT 10 +) SELECT o_orderkey AS key, CASE @@ -9,9 +20,6 @@ SELECT THEN 'C' ELSE 'D' END AS val -FROM tpch.orders -WHERE - o_clerk = 'Clerk#000000951' +FROM _t0 ORDER BY o_orderkey -LIMIT 10 diff --git a/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql index df8c90691..7d81bf0ec 100644 --- a/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql @@ -1,10 +1,19 @@ +WITH _t0 AS ( + SELECT + o_orderdate, + o_orderkey, + o_totalprice + FROM tpch.orders + ORDER BY + o_totalprice + LIMIT 5 +) SELECT o_orderkey AS key, STRFTIME('%d/%m/%Y', o_orderdate) AS d1, STRFTIME('%Y:%j', o_orderdate) AS d2, CAST(STRFTIME('%s', o_orderdate) AS INTEGER) AS d3, CAST(STRFTIME('%Y%m%d', o_orderdate, '+39 days', 'start of month') AS INTEGER) AS d4 -FROM tpch.orders +FROM _t0 ORDER BY o_totalprice -LIMIT 5 diff --git a/tests/test_sql_refsols/sqlite_udf_gcat_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_gcat_sqlite.sql index 4e168aa30..a7246cb29 100644 --- a/tests/test_sql_refsols/sqlite_udf_gcat_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_gcat_sqlite.sql @@ -1,16 +1,8 @@ -WITH _t0 AS ( - SELECT - GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS c1, - GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS c2, - GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS c3, - r_name - FROM tpch.region -) SELECT r_name AS name, - c1, - c2, - c3 -FROM _t0 + GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS c1, + GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS c2, + GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS c3 +FROM tpch.region ORDER BY r_name diff --git a/tests/test_sql_refsols/sqlite_udf_nval_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_nval_sqlite.sql index 0d0076c57..9a4baf571 100644 --- a/tests/test_sql_refsols/sqlite_udf_nval_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_nval_sqlite.sql @@ -1,23 +1,13 @@ -WITH _t0 AS ( - SELECT - NTH_VALUE(nation.n_name, 3) OVER (ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS v1, - NTH_VALUE(nation.n_name, 1) OVER (PARTITION BY nation.n_regionkey ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS v2, - NTH_VALUE(nation.n_name, 2) OVER (PARTITION BY nation.n_regionkey ORDER BY nation.n_name ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS v3, - NTH_VALUE(nation.n_name, 5) OVER (ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS v4, - nation.n_name, - region.r_name - FROM tpch.region AS region - JOIN tpch.nation AS nation - ON nation.n_regionkey = region.r_regionkey -) SELECT - r_name AS rname, - n_name AS nname, - v1, - v2, - v3, - v4 -FROM _t0 + region.r_name AS rname, + nation.n_name AS nname, + NTH_VALUE(nation.n_name, 3) OVER (ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS v1, + NTH_VALUE(nation.n_name, 1) OVER (PARTITION BY nation.n_regionkey ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS v2, + NTH_VALUE(nation.n_name, 2) OVER (PARTITION BY nation.n_regionkey ORDER BY nation.n_name ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS v3, + NTH_VALUE(nation.n_name, 5) OVER (ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS v4 +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey ORDER BY - r_name, - n_name + region.r_name, + nation.n_name diff --git a/tests/test_sql_refsols/sqlite_udf_percent_positive_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_percent_positive_sqlite.sql index 4dbef9cea..c4ac974c1 100644 --- a/tests/test_sql_refsols/sqlite_udf_percent_positive_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_percent_positive_sqlite.sql @@ -5,12 +5,9 @@ WITH _s0 AS ( FROM tpch.nation ), _s3 AS ( SELECT - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN customer.c_acctbal > 0 THEN 1 END) - ) AS REAL) / COUNT(*), - 2 - ) AS pct_cust_positive, + CAST(( + 100.0 * SUM(CASE WHEN customer.c_acctbal > 0 THEN 1 END) + ) AS REAL) / COUNT(*) AS percentage_expr_2, _s0.n_regionkey FROM _s0 AS _s0 JOIN tpch.customer AS customer @@ -19,12 +16,9 @@ WITH _s0 AS ( _s0.n_regionkey ), _s7 AS ( SELECT - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN supplier.s_acctbal > 0 THEN 1 END) - ) AS REAL) / COUNT(*), - 2 - ) AS pct_supp_positive, + CAST(( + 100.0 * SUM(CASE WHEN supplier.s_acctbal > 0 THEN 1 END) + ) AS REAL) / COUNT(*) AS percentage_expr_3, _s4.n_regionkey FROM _s0 AS _s4 JOIN tpch.supplier AS supplier @@ -34,8 +28,8 @@ WITH _s0 AS ( ) SELECT region.r_name AS name, - _s3.pct_cust_positive, - _s7.pct_supp_positive + ROUND(_s3.percentage_expr_2, 2) AS pct_cust_positive, + ROUND(_s7.percentage_expr_3, 2) AS pct_supp_positive FROM tpch.region AS region JOIN _s3 AS _s3 ON _s3.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 52747c6e5..64803c51d 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -9,7 +9,7 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - part.p_partkey AS p_partkey_1, + part.p_partkey, _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 @@ -22,7 +22,7 @@ WITH _s3 AS ( partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 - ON _s5.p_partkey_1 = partsupp.ps_partkey + ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index c0f053dfc..e5b221d69 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -9,7 +9,7 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - part.p_partkey AS p_partkey_1, + part.p_partkey, _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 @@ -22,7 +22,7 @@ WITH _s3 AS ( partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 - ON _s5.p_partkey_1 = partsupp.ps_partkey + ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) From d88cdb51c4ee0a25b5107155c601c7c54803a061 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 03:41:32 -0400 Subject: [PATCH 014/143] Compressing limit into root --- pydough/conversion/column_bubbler.py | 2 +- pydough/conversion/merge_projects.py | 26 +++++++- pydough/relational/relational_nodes/limit.py | 2 +- .../relational_nodes/relational_root.py | 23 +++++-- pydough/sqlglot/sqlglot_relational_visitor.py | 5 ++ .../aggregation_analytics_1.txt | 44 ++++++------- .../aggregation_analytics_2.txt | 32 +++++---- .../aggregation_analytics_3.txt | 32 +++++---- .../avg_order_diff_per_customer.txt | 21 +++--- tests/test_plan_refsols/bad_child_reuse_1.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 19 +++--- tests/test_plan_refsols/bad_child_reuse_3.txt | 19 +++--- tests/test_plan_refsols/bad_child_reuse_4.txt | 17 +++-- tests/test_plan_refsols/bad_child_reuse_5.txt | 2 +- tests/test_plan_refsols/common_prefix_aa.txt | 15 ++--- tests/test_plan_refsols/common_prefix_al.txt | 2 +- tests/test_plan_refsols/common_prefix_am.txt | 2 +- tests/test_plan_refsols/common_prefix_an.txt | 2 +- tests/test_plan_refsols/common_prefix_ao.txt | 53 ++++++++------- tests/test_plan_refsols/common_prefix_i.txt | 21 +++--- tests/test_plan_refsols/common_prefix_j.txt | 13 ++-- tests/test_plan_refsols/common_prefix_k.txt | 13 ++-- tests/test_plan_refsols/common_prefix_l.txt | 35 +++++----- tests/test_plan_refsols/common_prefix_m.txt | 35 +++++----- tests/test_plan_refsols/common_prefix_n.txt | 51 +++++++------- tests/test_plan_refsols/common_prefix_o.txt | 55 ++++++++-------- tests/test_plan_refsols/common_prefix_p.txt | 28 ++++---- tests/test_plan_refsols/common_prefix_q.txt | 30 ++++----- tests/test_plan_refsols/common_prefix_r.txt | 36 +++++----- tests/test_plan_refsols/common_prefix_t.txt | 26 ++++---- tests/test_plan_refsols/common_prefix_u.txt | 30 ++++----- tests/test_plan_refsols/common_prefix_v.txt | 15 ++--- tests/test_plan_refsols/common_prefix_w.txt | 17 +++-- tests/test_plan_refsols/common_prefix_x.txt | 22 +++---- tests/test_plan_refsols/common_prefix_y.txt | 22 +++---- tests/test_plan_refsols/common_prefix_z.txt | 15 ++--- tests/test_plan_refsols/correl_19.txt | 17 +++-- tests/test_plan_refsols/correl_22.txt | 17 +++-- tests/test_plan_refsols/correl_25.txt | 41 ++++++------ tests/test_plan_refsols/correl_32.txt | 24 ++++--- ...count_at_most_100_suppliers_per_nation.txt | 2 +- .../customer_largest_order_deltas.txt | 30 ++++----- .../customer_most_recent_orders.txt | 14 ++-- tests/test_plan_refsols/datetime_relative.txt | 2 +- .../test_plan_refsols/deep_best_analysis.txt | 66 +++++++++---------- tests/test_plan_refsols/dumb_aggregation.txt | 2 +- .../epoch_culture_events_info.txt | 25 ++++--- ...ping_event_search_other_users_per_user.txt | 25 ++++--- ...ch_overlapping_event_searches_per_user.txt | 29 ++++---- .../epoch_users_most_cold_war_searches.txt | 23 ++++--- tests/test_plan_refsols/exponentiation.txt | 6 +- .../first_order_per_customer.txt | 13 ++-- tests/test_plan_refsols/floor_and_ceil_2.txt | 6 +- tests/test_plan_refsols/function_sampler.txt | 15 ++--- tests/test_plan_refsols/join_topk.txt | 9 ++- .../minutes_seconds_datediff.txt | 7 +- .../multi_partition_access_1.txt | 5 +- .../test_plan_refsols/order_by_expression.txt | 6 +- .../test_plan_refsols/order_quarter_test.txt | 7 +- .../orders_versus_first_orders.txt | 20 +++--- tests/test_plan_refsols/padding_functions.txt | 5 +- tests/test_plan_refsols/part_reduced_size.txt | 13 ++-- .../parts_quantity_increase_95_96.txt | 36 +++++----- .../quantile_function_test_2.txt | 2 +- .../quantile_function_test_3.txt | 2 +- .../quantile_function_test_4.txt | 2 +- .../rank_nations_per_region_by_customers.txt | 16 ++--- ...rank_parts_per_supplier_region_by_size.txt | 22 +++---- .../test_plan_refsols/rank_with_filters_c.txt | 2 +- tests/test_plan_refsols/sign.txt | 5 +- tests/test_plan_refsols/simple_cross_5.txt | 4 +- tests/test_plan_refsols/simple_cross_7.txt | 24 ++++--- tests/test_plan_refsols/simple_cross_9.txt | 17 +++-- .../simple_filter_top_five.txt | 7 +- .../simple_scan_top_five.txt | 5 +- tests/test_plan_refsols/simple_topk.txt | 5 +- tests/test_plan_refsols/singular3.txt | 2 +- tests/test_plan_refsols/singular4.txt | 15 ++--- tests/test_plan_refsols/singular5.txt | 27 ++++---- tests/test_plan_refsols/singular6.txt | 27 ++++---- tests/test_plan_refsols/singular7.txt | 29 ++++---- .../test_plan_refsols/sqlite_udf_decode3.txt | 7 +- .../sqlite_udf_format_datetime.txt | 5 +- .../test_plan_refsols/supplier_best_part.txt | 31 +++++---- .../supplier_pct_national_qty.txt | 32 +++++---- .../test_plan_refsols/suppliers_bal_diffs.txt | 14 ++-- ...ograph_battery_failure_rates_anomalies.txt | 30 ++++----- ...chnograph_country_combination_analysis.txt | 30 ++++----- .../technograph_hot_purchase_window.txt | 17 +++-- .../technograph_most_unreliable_products.txt | 24 ++++--- ...top_5_nations_balance_by_num_suppliers.txt | 11 ++-- .../top_5_nations_by_num_supplierss.txt | 11 ++-- .../top_customers_by_orders.txt | 12 ++-- tests/test_plan_refsols/topk_order_by.txt | 5 +- .../test_plan_refsols/topk_order_by_calc.txt | 5 +- .../topk_replace_order_by.txt | 5 +- .../topk_root_different_order_by.txt | 2 +- tests/test_plan_refsols/tpch_q10.txt | 26 ++++---- tests/test_plan_refsols/tpch_q11.txt | 41 ++++++------ tests/test_plan_refsols/tpch_q13.txt | 17 +++-- tests/test_plan_refsols/tpch_q16.txt | 19 +++--- tests/test_plan_refsols/tpch_q18.txt | 19 +++--- tests/test_plan_refsols/tpch_q2.txt | 27 ++++---- tests/test_plan_refsols/tpch_q20.txt | 37 +++++------ tests/test_plan_refsols/tpch_q21.txt | 48 +++++++------- tests/test_plan_refsols/tpch_q3.txt | 24 ++++--- tests/test_plan_refsols/tpch_q9.txt | 31 +++++---- .../window_sliding_frame_relsize.txt | 10 ++- .../window_sliding_frame_relsum.txt | 10 ++- .../year_month_nation_orders.txt | 25 ++++--- .../years_months_days_hours_datediff.txt | 8 +-- tests/test_sql_refsols/datediff_ansi.sql | 20 ++---- tests/test_sql_refsols/datediff_sqlite.sql | 20 ++---- .../defog_broker_adv10_ansi.sql | 2 +- .../defog_broker_adv10_sqlite.sql | 2 +- .../defog_broker_adv1_ansi.sql | 2 +- .../defog_broker_adv1_sqlite.sql | 2 +- .../defog_broker_adv2_ansi.sql | 2 +- .../defog_broker_adv2_sqlite.sql | 2 +- .../defog_broker_adv4_ansi.sql | 2 +- .../defog_broker_adv4_sqlite.sql | 2 +- .../defog_broker_basic3_ansi.sql | 24 +++---- .../defog_broker_basic3_sqlite.sql | 24 +++---- .../defog_broker_gen4_ansi.sql | 2 +- .../defog_broker_gen4_sqlite.sql | 2 +- .../defog_dealership_adv16_ansi.sql | 2 +- .../defog_dealership_adv16_sqlite.sql | 2 +- .../defog_dealership_basic10_ansi.sql | 27 +++----- .../defog_dealership_basic10_sqlite.sql | 27 +++----- .../defog_dealership_basic5_ansi.sql | 27 +++----- .../defog_dealership_basic5_sqlite.sql | 27 +++----- .../defog_dealership_basic6_ansi.sql | 2 +- .../defog_dealership_basic6_sqlite.sql | 2 +- .../defog_dealership_basic7_ansi.sql | 2 +- .../defog_dealership_basic7_sqlite.sql | 2 +- .../defog_dealership_basic8_ansi.sql | 27 +++----- .../defog_dealership_basic8_sqlite.sql | 27 +++----- .../defog_dealership_gen1_ansi.sql | 2 +- .../defog_dealership_gen1_sqlite.sql | 4 +- .../defog_ewallet_adv15_ansi.sql | 2 +- .../defog_ewallet_adv15_sqlite.sql | 2 +- .../defog_ewallet_basic10_ansi.sql | 24 +++---- .../defog_ewallet_basic10_sqlite.sql | 24 +++---- .../defog_ewallet_basic8_ansi.sql | 24 +++---- .../defog_ewallet_basic8_sqlite.sql | 24 +++---- .../defog_ewallet_basic9_ansi.sql | 2 +- .../defog_ewallet_basic9_sqlite.sql | 2 +- .../epoch_culture_events_info_ansi.sql | 58 +++++++--------- .../epoch_culture_events_info_sqlite.sql | 58 +++++++--------- .../floor_and_ceil_2_ansi.sql | 18 ++--- .../floor_and_ceil_2_sqlite.sql | 48 +++++++------- .../sqlite_udf_decode3_sqlite.sql | 16 ++--- .../sqlite_udf_format_datetime_sqlite.sql | 13 +--- ...h_battery_failure_rates_anomalies_ansi.sql | 2 +- ...battery_failure_rates_anomalies_sqlite.sql | 2 +- ...raph_country_combination_analysis_ansi.sql | 4 +- ...ph_country_combination_analysis_sqlite.sql | 4 +- ...hnograph_most_unreliable_products_ansi.sql | 2 +- ...ograph_most_unreliable_products_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q10_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q10_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q20_ansi.sql | 6 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 6 +- tests/test_sql_refsols/tpch_q21_ansi.sql | 46 ++++++------- tests/test_sql_refsols/tpch_q21_sqlite.sql | 50 +++++++------- tests/test_sql_refsols/tpch_q2_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q3_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q3_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q9_ansi.sql | 55 +++++++--------- tests/test_sql_refsols/tpch_q9_sqlite.sql | 55 +++++++--------- 170 files changed, 1324 insertions(+), 1574 deletions(-) diff --git a/pydough/conversion/column_bubbler.py b/pydough/conversion/column_bubbler.py index b09975d6d..d5d0bc131 100644 --- a/pydough/conversion/column_bubbler.py +++ b/pydough/conversion/column_bubbler.py @@ -333,4 +333,4 @@ def bubble_column_names(root: RelationalRoot) -> RelationalRoot: ordering.nulls_first, ) ) - return RelationalRoot(new_input, new_ordered_columns, new_orderings) + return RelationalRoot(new_input, new_ordered_columns, new_orderings, root.limit) diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index ca2d3f4d1..cc5eac798 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -229,7 +229,7 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: and any(contains_window(expr) for expr in node.columns.values()) ): # Replace all column references in the root's columns with - # the expressions from the child projection.. + # the expressions from the child projection. for idx, (name, expr) in enumerate(node.ordered_columns): new_expr = transpose_expression(expr, child_project.columns) node.columns[name] = new_expr @@ -290,6 +290,30 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: if key_name not in keys_used: new_columns[key_name] = node.input.columns[key_name] return node.input.copy(columns=new_columns) + # Alternatively: if the node is a root and it is on top of a limit, try to + # suck the limit into the root. + if isinstance(node, RelationalRoot) and isinstance(node.input, Limit): + new_orderings: list[ExpressionSortInfo] = [ + ExpressionSortInfo( + transpose_expression(ordering.expr, node.input.columns), + ordering.ascending, + ordering.nulls_first, + ) + for ordering in node.orderings + ] + if node.input.orderings == new_orderings: + # If the orderings are the same, pull in the limit into the root. + # Replace all column references in the root's columns with + # the expressions from the child projection. + for idx, (name, expr) in enumerate(node.ordered_columns): + new_expr = transpose_expression(expr, node.input.columns) + node.columns[name] = new_expr + node.ordered_columns[idx] = (name, new_expr) + node._orderings = new_orderings + node._limit = node.input.limit + # Delete the child projection from the tree, replacing it + # with its input. + node._input = node.input.input return node diff --git a/pydough/relational/relational_nodes/limit.py b/pydough/relational/relational_nodes/limit.py index 9da2f2061..45311aee4 100644 --- a/pydough/relational/relational_nodes/limit.py +++ b/pydough/relational/relational_nodes/limit.py @@ -66,7 +66,7 @@ def to_string(self, compact: bool = False) -> str: orderings: list[str] = [ ordering.to_string(compact) for ordering in self.orderings ] - return f"LIMIT(limit={self.limit}, columns={self.make_column_string(self.columns, compact)}, orderings=[{', '.join(orderings)}])" + return f"LIMIT(limit={self.limit.to_string(compact)}, columns={self.make_column_string(self.columns, compact)}, orderings=[{', '.join(orderings)}])" def accept(self, visitor: "RelationalVisitor") -> None: # type: ignore # noqa return visitor.visit_limit(self) diff --git a/pydough/relational/relational_nodes/relational_root.py b/pydough/relational/relational_nodes/relational_root.py index 03a862552..48d7b4e7e 100644 --- a/pydough/relational/relational_nodes/relational_root.py +++ b/pydough/relational/relational_nodes/relational_root.py @@ -25,6 +25,7 @@ def __init__( input: RelationalNode, ordered_columns: list[tuple[str, RelationalExpression]], orderings: list[ExpressionSortInfo] | None = None, + limit: RelationalExpression | None = None, ) -> None: columns = dict(ordered_columns) assert len(columns) == len(ordered_columns), ( @@ -35,6 +36,7 @@ def __init__( self._orderings: list[ExpressionSortInfo] = ( [] if orderings is None else orderings ) + self._limit: RelationalExpression | None = limit @property def ordered_columns(self) -> list[tuple[str, RelationalExpression]]: @@ -51,6 +53,13 @@ def orderings(self) -> list[ExpressionSortInfo]: """ return self._orderings + @property + def limit(self) -> RelationalExpression | None: + """ + The limit on the number of rows in the final output, if any. + """ + return self._limit + def node_equals(self, other: RelationalNode) -> bool: return ( isinstance(other, RelationalRoot) @@ -67,9 +76,13 @@ def to_string(self, compact: bool = False) -> str: orderings: list[str] = [ ordering.to_string(compact) for ordering in self.orderings ] - return ( - f"ROOT(columns=[{', '.join(columns)}], orderings=[{', '.join(orderings)}])" - ) + kwargs: list[tuple[str, str]] = [ + ("columns", f"[{', '.join(columns)}]"), + ("orderings", f"[{', '.join(orderings)}]"), + ] + if self.limit is not None: + kwargs.append(("limit", self.limit.to_string(compact))) + return f"ROOT({', '.join(f'{k}={v}' for k, v in kwargs)})" def accept(self, visitor: "RelationalVisitor") -> None: # type: ignore # noqa visitor.visit_root(self) @@ -81,4 +94,6 @@ def node_copy( ) -> RelationalNode: assert len(inputs) == 1, "Root node should have exactly one input" assert columns == self.columns, "Root columns should not be modified" - return RelationalRoot(inputs[0], self.ordered_columns, self.orderings) + return RelationalRoot( + inputs[0], self.ordered_columns, self.orderings, self.limit + ) diff --git a/pydough/sqlglot/sqlglot_relational_visitor.py b/pydough/sqlglot/sqlglot_relational_visitor.py index 5db263795..53fb7d288 100644 --- a/pydough/sqlglot/sqlglot_relational_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_visitor.py @@ -565,6 +565,11 @@ def visit_root(self, root: RelationalRoot) -> None: query = self._build_subquery(input_expr, exprs, sort=False) if ordering_exprs: query = query.order_by(*ordering_exprs) + if root.limit is not None: + limit_expr: SQLGlotExpression = self._expr_visitor.relational_to_sqlglot( + root.limit + ) + query = query.limit(limit_expr) self._stack.append(query) def relational_to_sqlglot(self, root: RelationalRoot) -> SQLGlotExpression: diff --git a/tests/test_plan_refsols/aggregation_analytics_1.txt b/tests/test_plan_refsols/aggregation_analytics_1.txt index 993f46ef8..f83dbaec8 100644 --- a/tests/test_plan_refsols/aggregation_analytics_1.txt +++ b/tests/test_plan_refsols/aggregation_analytics_1.txt @@ -1,23 +1,21 @@ -ROOT(columns=[('part_name', p_name), ('revenue_generated', revenue_generated)], orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'p_name': p_name, 'revenue_generated': revenue_generated}, orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - PROJECT(columns={'p_name': p_name, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) - JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) +ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=8:numeric) + JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(revenue)}) + PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index a3fc5b678..2be73d9d4 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,17 +1,15 @@ -ROOT(columns=[('part_name', p_name), ('revenue_generated', revenue_generated)], orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - LIMIT(limit=Literal(value=4, type=NumericType()), columns={'p_name': p_name, 'revenue_generated': revenue_generated}, orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - PROJECT(columns={'p_name': p_name, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 8071a16f2..0945b2982 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,17 +1,15 @@ -ROOT(columns=[('part_name', p_name), ('revenue_ratio', revenue_ratio)], orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'p_name': p_name, 'revenue_ratio': revenue_ratio}, orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) - PROJECT(columns={'p_name': p_name, 'revenue_ratio': ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)}) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/avg_order_diff_per_customer.txt b/tests/test_plan_refsols/avg_order_diff_per_customer.txt index 3f8662435..5a91d29b9 100644 --- a/tests/test_plan_refsols/avg_order_diff_per_customer.txt +++ b/tests/test_plan_refsols/avg_order_diff_per_customer.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('name', c_name), ('avg_diff', avg_diff)], orderings=[(avg_diff):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_diff': avg_diff, 'c_name': c_name}, orderings=[(avg_diff):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'avg_diff': AVG(day_diff)}) - PROJECT(columns={'day_diff': DATEDIFF('days':string, PREV(args=[o_orderdate], partition=[o_custkey], order=[(o_orderdate):asc_last]), o_orderdate), 'o_custkey': o_custkey}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('name', c_name), ('avg_diff', avg_diff)], orderings=[(avg_diff):desc_last], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'avg_diff': AVG(day_diff)}) + PROJECT(columns={'day_diff': DATEDIFF('days':string, PREV(args=[o_orderdate], partition=[o_custkey], order=[(o_orderdate):asc_last]), o_orderdate), 'o_custkey': o_custkey}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index df12efecf..9a59bd793 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 128fe3cf1..9aa529377 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 128fe3cf1..9aa529377 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index 0bcaee7d1..510790836 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index 98c75eec1..8d79ea81a 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_aa.txt b/tests/test_plan_refsols/common_prefix_aa.txt index ce3c2be2b..71a7e20e6 100644 --- a/tests/test_plan_refsols/common_prefix_aa.txt +++ b/tests/test_plan_refsols/common_prefix_aa.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 240fcb590..d5af6cd67 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t0.n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 23c6a811a..f42e5981e 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) + LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 6d114b0ce..33522fc5d 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=Literal(value=50, type=NumericType()), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 4ac379581..9b504757d 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,27 +1,26 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=Literal(value=35, type=NumericType()), columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) + LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index 5056ae384..c74e5591b 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(n_rows):desc_last, (n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) +ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/common_prefix_j.txt b/tests/test_plan_refsols/common_prefix_j.txt index a338a9b8e..5cd83f892 100644 --- a/tests/test_plan_refsols/common_prefix_j.txt +++ b/tests/test_plan_refsols/common_prefix_j.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_k.txt b/tests/test_plan_refsols/common_prefix_k.txt index 4c3a0abf6..41de2f7c4 100644 --- a/tests/test_plan_refsols/common_prefix_k.txt +++ b/tests/test_plan_refsols/common_prefix_k.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 027e4d8e7..7ce4fe3b2 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index f91eee7b1..7dda4ae8a 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 71e91f37d..fc3a3530b 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,26 +1,25 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 907f12e2a..1916a60a9 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,28 +1,27 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 3d779be70..f3d26328c 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,15 +1,13 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(ordering_3):asc_first, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)):asc_first, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index e7f6f5fd0..c9471f9aa 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('name', c_name), ('total_spent', total_spent), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('name', c_name), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric)), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 355dc9ad3..50f09aa93 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,19 +1,17 @@ -ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', total_spent)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_anything_l_extendedprice': MAX(anything_anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_o_totalprice': SUM(o_totalprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_anything_l_extendedprice': t1.anything_anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'sum_n_rows': t1.sum_n_rows}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_anything_l_extendedprice': ANYTHING(anything_l_extendedprice), 'anything_p_name': ANYTHING(p_name), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_l_extendedprice': t0.anything_l_extendedprice, 'n_rows': t0.n_rows, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) - AGGREGATE(keys={'l_partkey': l_partkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_extendedprice': ANYTHING(l_extendedprice), 'n_rows': COUNT()}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_anything_l_extendedprice': MAX(anything_anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_o_totalprice': SUM(o_totalprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_anything_l_extendedprice': t1.anything_anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'sum_n_rows': t1.sum_n_rows}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_anything_l_extendedprice': ANYTHING(anything_l_extendedprice), 'anything_p_name': ANYTHING(p_name), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_l_extendedprice': t0.anything_l_extendedprice, 'n_rows': t0.n_rows, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) + AGGREGATE(keys={'l_partkey': l_partkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_extendedprice': ANYTHING(l_extendedprice), 'n_rows': COUNT()}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index e350ba091..9095c85fb 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,14 +1,12 @@ -ROOT(columns=[('name', c_name), ('total_qty', total_qty)], orderings=[(total_qty):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'total_qty': total_qty}, orderings=[(total_qty):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) +ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index b76d2a813..796e244a7 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('name', c_name), ('total_qty', total_qty)], orderings=[(total_qty):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'total_qty': total_qty}, orderings=[(total_qty):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) +ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index 2c10e35a7..3dc65ce4f 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_w.txt b/tests/test_plan_refsols/common_prefix_w.txt index 4b633dbd0..2d7e3a5d0 100644 --- a/tests/test_plan_refsols/common_prefix_w.txt +++ b/tests/test_plan_refsols/common_prefix_w.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'o_orderkey': o_orderkey}, orderings=[(o_orderkey):asc_first]) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first], limit=5:numeric) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 9a5054bce..9de4be686 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('name', c_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_orders': n_orders}, orderings=[(n_orders):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 5a92df8a2..938d31871 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('name', c_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_orders': n_orders}, orderings=[(n_orders):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 75f223d8a..23dd535b3 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index 528439e8a..a34916e67 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('supplier_name', supplier_name), ('n_super_cust', n_super_cust)], orderings=[(n_super_cust):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_super_cust': n_super_cust, 'supplier_name': supplier_name}, orderings=[(n_super_cust):desc_last]) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'n_super_cust': COUNT(), 'supplier_name': ANYTHING(s_name)}) - FILTER(condition=c_acctbal > s_acctbal, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('supplier_name', supplier_name), ('n_super_cust', n_super_cust)], orderings=[(n_super_cust):desc_last], limit=5:numeric) + AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'n_super_cust': COUNT(), 'supplier_name': ANYTHING(s_name)}) + FILTER(condition=c_acctbal > s_acctbal, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_22.txt b/tests/test_plan_refsols/correl_22.txt index 97f8f16d3..88b3d92e9 100644 --- a/tests/test_plan_refsols/correl_22.txt +++ b/tests/test_plan_refsols/correl_22.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('container', p_container), ('n_types', n_types)], orderings=[(n_types):desc_last, (p_container):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_types': n_types, 'p_container': p_container}, orderings=[(n_types):desc_last, (p_container):asc_first]) - AGGREGATE(keys={'p_container': p_container}, aggregations={'n_types': COUNT()}) - FILTER(condition=avg_p_retailprice > global_avg_price, columns={'p_container': p_container}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'global_avg_price': t0.global_avg_price, 'p_container': t1.p_container}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - AGGREGATE(keys={'p_container': p_container, 'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_retailprice': p_retailprice, 'p_type': p_type}) +ROOT(columns=[('container', p_container), ('n_types', n_types)], orderings=[(n_types):desc_last, (p_container):asc_first], limit=5:numeric) + AGGREGATE(keys={'p_container': p_container}, aggregations={'n_types': COUNT()}) + FILTER(condition=avg_p_retailprice > global_avg_price, columns={'p_container': p_container}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'global_avg_price': t0.global_avg_price, 'p_container': t1.p_container}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + AGGREGATE(keys={'p_container': p_container, 'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/correl_25.txt b/tests/test_plan_refsols/correl_25.txt index 8de29facd..47e41917c 100644 --- a/tests/test_plan_refsols/correl_25.txt +++ b/tests/test_plan_refsols/correl_25.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', anything_r_regionkey), ('cust_nation_name', anything_n_name), ('cust_nation_key', anything_n_nationkey), ('customer_name', anything_c_name), ('n_urgent_semi_domestic_rail_orders', n_urgent_semi_domestic_rail_orders)], orderings=[(n_urgent_semi_domestic_rail_orders):desc_last, (anything_c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name, 'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_r_name': anything_r_name, 'anything_r_regionkey': anything_r_regionkey, 'n_urgent_semi_domestic_rail_orders': n_urgent_semi_domestic_rail_orders}, orderings=[(n_urgent_semi_domestic_rail_orders):desc_last, (anything_c_name):asc_first]) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_r_name': ANYTHING(r_name), 'anything_r_regionkey': ANYTHING(r_regionkey), 'n_urgent_semi_domestic_rail_orders': NDISTINCT(l_orderkey)}) - FILTER(condition=name_12 != n_name & expr_3 == r_name, columns={'c_custkey': c_custkey, 'c_name': c_name, 'l_orderkey': l_orderkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'expr_3': t1.r_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'name_12': t1.n_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', anything_r_regionkey), ('cust_nation_name', anything_n_name), ('cust_nation_key', anything_n_nationkey), ('customer_name', anything_c_name), ('n_urgent_semi_domestic_rail_orders', n_urgent_semi_domestic_rail_orders)], orderings=[(n_urgent_semi_domestic_rail_orders):desc_last, (anything_c_name):asc_first], limit=5:numeric) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_r_name': ANYTHING(r_name), 'anything_r_regionkey': ANYTHING(r_regionkey), 'n_urgent_semi_domestic_rail_orders': NDISTINCT(l_orderkey)}) + FILTER(condition=name_12 != n_name & expr_3 == r_name, columns={'c_custkey': c_custkey, 'c_name': c_name, 'l_orderkey': l_orderkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'expr_3': t1.r_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'name_12': t1.n_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_32.txt b/tests/test_plan_refsols/correl_32.txt index e1c8d129c..578c3368b 100644 --- a/tests/test_plan_refsols/correl_32.txt +++ b/tests/test_plan_refsols/correl_32.txt @@ -1,13 +1,11 @@ -ROOT(columns=[('customer_name', anything_c_name), ('delta', delta)], orderings=[(delta):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name, 'delta': delta}, orderings=[(delta):asc_first]) - PROJECT(columns={'anything_c_name': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_name': ANYTHING(c_name), 'median_s_acctbal': MEDIAN(s_acctbal)}) - JOIN(condition=SLICE(t1.s_phone, -1:numeric, None:unknown, None:unknown) == SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'MIDDLE EAST':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_phone': s_phone}) +ROOT(columns=[('customer_name', anything_c_name), ('delta', ABS(anything_c_acctbal - median_s_acctbal))], orderings=[(ABS(anything_c_acctbal - median_s_acctbal)):asc_first], limit=5:numeric) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_name': ANYTHING(c_name), 'median_s_acctbal': MEDIAN(s_acctbal)}) + JOIN(condition=SLICE(t1.s_phone, -1:numeric, None:unknown, None:unknown) == SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'MIDDLE EAST':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_phone': s_phone}) diff --git a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt index 23a78a9e5..3c7ff73c0 100644 --- a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt +++ b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt @@ -2,5 +2,5 @@ ROOT(columns=[('name', n_name), ('n_top_suppliers', DEFAULT_TO(count_s_suppkey, JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - LIMIT(limit=Literal(value=100, type=NumericType()), columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}, orderings=[(s_acctbal):asc_last]) + LIMIT(limit=100:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}, orderings=[(s_acctbal):asc_last]) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 1fa05bd28..97aba3b36 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('name', c_name), ('largest_diff', largest_diff)], orderings=[(largest_diff):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) - PROJECT(columns={'c_name': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) - PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) - FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(r)}) - PROJECT(columns={'l_orderkey': l_orderkey, 'r': l_extendedprice * 1:numeric - l_discount}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) +ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_diff) > max_diff, min_diff, max_diff))], orderings=[(IFF(ABS(min_diff) > max_diff, min_diff, max_diff)):desc_last], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) + PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) + FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(r)}) + PROJECT(columns={'l_orderkey': l_orderkey, 'r': l_extendedprice * 1:numeric - l_discount}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 52700565c..5f000a001 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('name', c_name), ('total_recent_value', total_recent_value)], orderings=[(total_recent_value):desc_last]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) - PROJECT(columns={'c_name': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('name', c_name), ('total_recent_value', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last], limit=3:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/datetime_relative.txt b/tests/test_plan_refsols/datetime_relative.txt index ddd91f4af..be99d1ef3 100644 --- a/tests/test_plan_refsols/datetime_relative.txt +++ b/tests/test_plan_refsols/datetime_relative.txt @@ -1,3 +1,3 @@ ROOT(columns=[('d1', DATETIME(o_orderdate, 'Start of Year':string)), ('d2', DATETIME(o_orderdate, 'START OF MONTHS':string)), ('d3', DATETIME(o_orderdate, '-11 years':string, '+9 months':string, ' - 7 DaYs ':string, '+5 h':string, '-3 minutes':string, '+1 second':string)), ('d4', DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of hour':string)), ('d5', DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of minute':string)), ('d6', DATETIME(Timestamp('2025-07-14 12:58:45'):datetime, '+ 1000000 seconds':string))], orderings=[(o_orderdate):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_custkey):asc_first, (o_orderdate):asc_first]) + LIMIT(limit=10:numeric, columns={'o_orderdate': o_orderdate}, orderings=[(o_custkey):asc_first, (o_orderdate):asc_first]) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index b1b75726f..bccadf51a 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,35 +1,33 @@ -ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', c_custkey), ('c_bal', c_acctbal), ('cr_bal', cr_bal), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', cg_key)], orderings=[(n_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': cg_key, 'cr_bal': cr_bal, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}, orderings=[(n_name):asc_first]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'key_54': t1.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) +ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', c_custkey), ('c_bal', c_acctbal), ('cr_bal', account_balance_21), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', key_54)], orderings=[(n_name):asc_first], limit=10:numeric) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'key_54': t1.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/dumb_aggregation.txt b/tests/test_plan_refsols/dumb_aggregation.txt index 2604e2675..2a9906b01 100644 --- a/tests/test_plan_refsols/dumb_aggregation.txt +++ b/tests/test_plan_refsols/dumb_aggregation.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', DEFAULT_TO(r_regionkey, 0:numeric)), ('a4', IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric)), ('a5', 1:numeric), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + LIMIT(limit=2:numeric, columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index d2b29d459..ebf3fce9c 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) - LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) - FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) +ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first], limit=6:numeric) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt index 32d0a2e7c..833fdf684 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('user_name', anything_user_name), ('n_other_users', n_other_users)], orderings=[(n_other_users):desc_last, (anything_user_name):asc_first]) - LIMIT(limit=Literal(value=7, type=NumericType()), columns={'anything_user_name': anything_user_name, 'n_other_users': n_other_users}, orderings=[(n_other_users):desc_last, (anything_user_name):asc_first]) - AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_user_name': ANYTHING(user_name), 'n_other_users': NDISTINCT(user_id_11)}) - FILTER(condition=name_9 != user_name, columns={'user_id': user_id, 'user_id_11': user_id_11, 'user_name': user_name}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=EVENTS, columns={'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) +ROOT(columns=[('user_name', anything_user_name), ('n_other_users', n_other_users)], orderings=[(n_other_users):desc_last, (anything_user_name):asc_first], limit=7:numeric) + AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_user_name': ANYTHING(user_name), 'n_other_users': NDISTINCT(user_id_11)}) + FILTER(condition=name_9 != user_name, columns={'user_id': user_id, 'user_id_11': user_id_11, 'user_name': user_name}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=EVENTS, columns={'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt index 37d9d53b4..91e424686 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_searches)], orderings=[(n_searches):desc_last, (anything_anything_user_name):asc_first]) - LIMIT(limit=Literal(value=4, type=NumericType()), columns={'anything_anything_user_name': anything_anything_user_name, 'n_searches': n_searches}, orderings=[(n_searches):desc_last, (anything_anything_user_name):asc_first]) - AGGREGATE(keys={'anything_user_id': anything_user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_searches': COUNT()}) - FILTER(condition=n_rows > 0:numeric, columns={'anything_user_id': anything_user_id, 'anything_user_name': anything_user_name}) - AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_user_id': ANYTHING(user_id), 'anything_user_name': ANYTHING(user_name), 'n_rows': COUNT()}) - FILTER(condition=name_9 != user_name, columns={'search_id': search_id, 'user_id': user_id, 'user_name': user_name}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=EVENTS, columns={'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) +ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_searches)], orderings=[(n_searches):desc_last, (anything_anything_user_name):asc_first], limit=4:numeric) + AGGREGATE(keys={'anything_user_id': anything_user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_searches': COUNT()}) + FILTER(condition=n_rows > 0:numeric, columns={'anything_user_id': anything_user_id, 'anything_user_name': anything_user_name}) + AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_user_id': ANYTHING(user_id), 'anything_user_name': ANYTHING(user_name), 'n_rows': COUNT()}) + FILTER(condition=name_9 != user_name, columns={'search_id': search_id, 'user_id': user_id, 'user_name': user_name}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=EVENTS, columns={'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) diff --git a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt index fcbb954a6..32fa17508 100644 --- a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt +++ b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_cold_war_searches)], orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'n_cold_war_searches': n_cold_war_searches, 'user_name': user_name}, orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first]) - JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) - AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_cold_war_searches': COUNT()}) - AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) +ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_cold_war_searches)], orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first], limit=3:numeric) + JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_cold_war_searches': COUNT()}) + AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/exponentiation.txt b/tests/test_plan_refsols/exponentiation.txt index 0c1fec6b1..ad7f1ae11 100644 --- a/tests/test_plan_refsols/exponentiation.txt +++ b/tests/test_plan_refsols/exponentiation.txt @@ -1,4 +1,2 @@ -ROOT(columns=[('low_square', low_square), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(low_square):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_square': low_square, 'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) - PROJECT(columns={'low_square': sbDpLow ** 2:numeric, 'sbDpLow': sbDpLow}) - SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) +ROOT(columns=[('low_square', sbDpLow ** 2:numeric), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(sbDpLow ** 2:numeric):asc_first], limit=10:numeric) + SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) diff --git a/tests/test_plan_refsols/first_order_per_customer.txt b/tests/test_plan_refsols/first_order_per_customer.txt index f1b37b331..ef68303cf 100644 --- a/tests/test_plan_refsols/first_order_per_customer.txt +++ b/tests/test_plan_refsols/first_order_per_customer.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order_price', o_totalprice)], orderings=[(o_totalprice):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) - FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order_price', o_totalprice)], orderings=[(o_totalprice):desc_last], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/floor_and_ceil_2.txt b/tests/test_plan_refsols/floor_and_ceil_2.txt index 29b9464c1..8a0835a61 100644 --- a/tests/test_plan_refsols/floor_and_ceil_2.txt +++ b/tests/test_plan_refsols/floor_and_ceil_2.txt @@ -1,4 +1,2 @@ -ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', total_cost)], orderings=[(total_cost):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': total_cost}, orderings=[(total_cost):desc_last]) - PROJECT(columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': CEIL(ps_supplycost * FLOOR(ps_availqty))}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', CEIL(ps_supplycost * FLOOR(ps_availqty)))], orderings=[(CEIL(ps_supplycost * FLOOR(ps_availqty))):desc_last], limit=10:numeric) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/function_sampler.txt b/tests/test_plan_refsols/function_sampler.txt index b3bce5eaf..f6aa4aa45 100644 --- a/tests/test_plan_refsols/function_sampler.txt +++ b/tests/test_plan_refsols/function_sampler.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('a', JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown))), ('b', ROUND(c_acctbal, 1:numeric)), ('c', KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string)), ('d', PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string))), ('e', ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string))), ('f', ROUND(c_acctbal))], orderings=[(c_address):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_address):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) +ROOT(columns=[('a', JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown))), ('b', ROUND(c_acctbal, 1:numeric)), ('c', KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string)), ('d', PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string))), ('e', ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string))), ('f', ROUND(c_acctbal))], orderings=[(c_address):asc_first], limit=10:numeric) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/join_topk.txt b/tests/test_plan_refsols/join_topk.txt index fc19c4da5..f92640023 100644 --- a/tests/test_plan_refsols/join_topk.txt +++ b/tests/test_plan_refsols/join_topk.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(n_name):asc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'r_name': r_name}, orderings=[(n_name):asc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(n_name):asc_last], limit=10:numeric) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/minutes_seconds_datediff.txt b/tests/test_plan_refsols/minutes_seconds_datediff.txt index 4ae64d60c..ff29298d5 100644 --- a/tests/test_plan_refsols/minutes_seconds_datediff.txt +++ b/tests/test_plan_refsols/minutes_seconds_datediff.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('x', sbTxDateTime), ('y', datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), ('minutes_diff', DATEDIFF('m':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime)), ('seconds_diff', DATEDIFF('s':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime))], orderings=[(sbTxDateTime):desc_last]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime}, orderings=[(sbTxDateTime):desc_last]) - FILTER(condition=YEAR(sbTxDateTime) <= 2024:numeric, columns={'sbTxDateTime': sbTxDateTime}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) +ROOT(columns=[('x', sbTxDateTime), ('y', datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), ('minutes_diff', DATEDIFF('m':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime)), ('seconds_diff', DATEDIFF('s':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime))], orderings=[(sbTxDateTime):desc_last], limit=30:numeric) + FILTER(condition=YEAR(sbTxDateTime) <= 2024:numeric, columns={'sbTxDateTime': sbTxDateTime}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_plan_refsols/multi_partition_access_1.txt b/tests/test_plan_refsols/multi_partition_access_1.txt index d4830999a..77ac57e0a 100644 --- a/tests/test_plan_refsols/multi_partition_access_1.txt +++ b/tests/test_plan_refsols/multi_partition_access_1.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('symbol', sbTickerSymbol)], orderings=[(sbTickerSymbol):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbTickerSymbol': sbTickerSymbol}, orderings=[(sbTickerSymbol):asc_first]) - SCAN(table=main.sbTicker, columns={'sbTickerSymbol': sbTickerSymbol}) +ROOT(columns=[('symbol', sbTickerSymbol)], orderings=[(sbTickerSymbol):asc_first], limit=5:numeric) + SCAN(table=main.sbTicker, columns={'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/order_by_expression.txt b/tests/test_plan_refsols/order_by_expression.txt index f9509574f..07a7a916f 100644 --- a/tests/test_plan_refsols/order_by_expression.txt +++ b/tests/test_plan_refsols/order_by_expression.txt @@ -1,4 +1,2 @@ -ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(ordering_1):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ordering_1': ordering_1, 'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(ordering_1):asc_first]) - PROJECT(columns={'ordering_1': LENGTH(r_name), 'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(LENGTH(r_name)):asc_first], limit=10:numeric) + SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/order_quarter_test.txt b/tests/test_plan_refsols/order_quarter_test.txt index 8666a4eba..d03ce7a7e 100644 --- a/tests/test_plan_refsols/order_quarter_test.txt +++ b/tests/test_plan_refsols/order_quarter_test.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('order_date', o_orderdate), ('quarter', QUARTER(o_orderdate)), ('quarter_start', DATETIME(o_orderdate, 'start of quarter':string)), ('next_quarter', DATETIME(o_orderdate, '+1 quarter':string)), ('prev_quarter', DATETIME(o_orderdate, '-1 quarter':string)), ('two_quarters_ahead', DATETIME(o_orderdate, '+2 quarters':string)), ('two_quarters_behind', DATETIME(o_orderdate, '-2 quarters':string)), ('quarters_since_1995', DATEDIFF('quarter':string, '1995-01-01':string, o_orderdate)), ('quarters_until_2000', DATEDIFF('quarter':string, o_orderdate, '2000-01-01':string)), ('same_quarter_prev_year', DATETIME(o_orderdate, '-4 quarters':string)), ('same_quarter_next_year', DATETIME(o_orderdate, '+4 quarters':string))], orderings=[(o_orderdate):asc_first]) - LIMIT(limit=Literal(value=1, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_first]) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) +ROOT(columns=[('order_date', o_orderdate), ('quarter', QUARTER(o_orderdate)), ('quarter_start', DATETIME(o_orderdate, 'start of quarter':string)), ('next_quarter', DATETIME(o_orderdate, '+1 quarter':string)), ('prev_quarter', DATETIME(o_orderdate, '-1 quarter':string)), ('two_quarters_ahead', DATETIME(o_orderdate, '+2 quarters':string)), ('two_quarters_behind', DATETIME(o_orderdate, '-2 quarters':string)), ('quarters_since_1995', DATEDIFF('quarter':string, '1995-01-01':string, o_orderdate)), ('quarters_until_2000', DATEDIFF('quarter':string, o_orderdate, '2000-01-01':string)), ('same_quarter_prev_year', DATETIME(o_orderdate, '-4 quarters':string)), ('same_quarter_next_year', DATETIME(o_orderdate, '+4 quarters':string))], orderings=[(o_orderdate):asc_first], limit=1:numeric) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 254b7ce5c..cf6a02f43 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', days_since_first_order)], orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) +ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', DATEDIFF('days':string, order_date_8, o_orderdate))], orderings=[(DATEDIFF('days':string, order_date_8, o_orderdate)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'VIETNAM':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'VIETNAM':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/padding_functions.txt b/tests/test_plan_refsols/padding_functions.txt index f511fb5a2..587435263 100644 --- a/tests/test_plan_refsols/padding_functions.txt +++ b/tests/test_plan_refsols/padding_functions.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('original_name', sbCustName), ('ref_rpad', RPAD('Cust0001':string, 30:numeric, '*':string)), ('ref_lpad', LPAD('Cust0001':string, 30:numeric, '*':string)), ('right_padded', RPAD(sbCustName, 30:numeric, '*':string)), ('left_padded', LPAD(sbCustName, 30:numeric, '#':string)), ('truncated_right', RPAD(sbCustName, 8:numeric, '-':string)), ('truncated_left', LPAD(sbCustName, 8:numeric, '-':string)), ('zero_pad_right', RPAD(sbCustName, 0:numeric, '.':string)), ('zero_pad_left', LPAD(sbCustName, 0:numeric, '.':string)), ('right_padded_space', RPAD(sbCustName, 30:numeric, ' ':string)), ('left_padded_space', LPAD(sbCustName, 30:numeric, ' ':string))], orderings=[(sbCustName):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbCustName': sbCustName}, orderings=[(sbCustName):asc_first]) - SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) +ROOT(columns=[('original_name', sbCustName), ('ref_rpad', RPAD('Cust0001':string, 30:numeric, '*':string)), ('ref_lpad', LPAD('Cust0001':string, 30:numeric, '*':string)), ('right_padded', RPAD(sbCustName, 30:numeric, '*':string)), ('left_padded', LPAD(sbCustName, 30:numeric, '#':string)), ('truncated_right', RPAD(sbCustName, 8:numeric, '-':string)), ('truncated_left', LPAD(sbCustName, 8:numeric, '-':string)), ('zero_pad_right', RPAD(sbCustName, 0:numeric, '.':string)), ('zero_pad_left', LPAD(sbCustName, 0:numeric, '.':string)), ('right_padded_space', RPAD(sbCustName, 30:numeric, ' ':string)), ('left_padded_space', LPAD(sbCustName, 30:numeric, ' ':string))], orderings=[(sbCustName):asc_first], limit=5:numeric) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 9bd706f37..a4d8aee87 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'l_discount': l_discount, 'l_receiptdate': l_receiptdate, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(l_discount):desc_last]) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) +ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last], limit=5:numeric) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) + LIMIT(limit=2:numeric, columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 7d3cc9e81..99a8d19a6 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,19 +1,17 @@ -ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(ordering_2):desc_last, (p_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'agg_1': agg_1, 'ordering_2': ordering_2, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) - PROJECT(columns={'agg_1': agg_1, 'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) +ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (p_name):asc_first], limit=3:numeric) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index 4d612371a..a500ccec6 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index 4d612371a..a500ccec6 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index 0059ac367..c9a18d116 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 720bf6ef4..1e519b6c2 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,9 +1,7 @@ -ROOT(columns=[('name', n_name), ('rank', rank)], orderings=[(rank):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'rank': rank}, orderings=[(rank):asc_first]) - PROJECT(columns={'n_name': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) +ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first]))], orderings=[(RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])):asc_first], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index 9372d9bb7..7fd4a68da 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', rank)], orderings=[(p_partkey):asc_first]) - LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': rank}, orderings=[(p_partkey):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) +ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True))], orderings=[(p_partkey):asc_first], limit=15:numeric) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/rank_with_filters_c.txt b/tests/test_plan_refsols/rank_with_filters_c.txt index 1a2c4b75c..52ebabdc7 100644 --- a/tests/test_plan_refsols/rank_with_filters_c.txt +++ b/tests/test_plan_refsols/rank_with_filters_c.txt @@ -1,7 +1,7 @@ ROOT(columns=[('pname', p_name), ('psize', size_3)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[p_size], order=[(p_retailprice):desc_first]) == 1:numeric, columns={'p_name': p_name, 'size_3': size_3}) JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'p_size': p_size}, orderings=[(p_size):desc_last]) + LIMIT(limit=5:numeric, columns={'p_size': p_size}, orderings=[(p_size):desc_last]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) SCAN(table=tpch.PART, columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/sign.txt b/tests/test_plan_refsols/sign.txt index 7c2d6ac5c..2e7984bf3 100644 --- a/tests/test_plan_refsols/sign.txt +++ b/tests/test_plan_refsols/sign.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('high', sbDpHigh), ('high_neg', -1:numeric * sbDpHigh), ('high_zero', 0:numeric * sbDpHigh), ('sign_high', SIGN(sbDpHigh)), ('sign_high_neg', SIGN(-1:numeric * sbDpHigh)), ('sign_high_zero', SIGN(0:numeric * sbDpHigh))], orderings=[(sbDpHigh):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbDpHigh': sbDpHigh}, orderings=[(sbDpHigh):asc_first]) - SCAN(table=main.sbDailyPrice, columns={'sbDpHigh': sbDpHigh}) +ROOT(columns=[('high', sbDpHigh), ('high_neg', -1:numeric * sbDpHigh), ('high_zero', 0:numeric * sbDpHigh), ('sign_high', SIGN(sbDpHigh)), ('sign_high_neg', SIGN(-1:numeric * sbDpHigh)), ('sign_high_zero', SIGN(0:numeric * sbDpHigh))], orderings=[(sbDpHigh):asc_first], limit=5:numeric) + SCAN(table=main.sbDailyPrice, columns={'sbDpHigh': sbDpHigh}) diff --git a/tests/test_plan_refsols/simple_cross_5.txt b/tests/test_plan_refsols/simple_cross_5.txt index 995757451..c3fd447c1 100644 --- a/tests/test_plan_refsols/simple_cross_5.txt +++ b/tests/test_plan_refsols/simple_cross_5.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ('best_order_priority_qty', total_qty)], orderings=[(p_size):asc_first]) JOIN(condition=t0.p_size == t1.anything_p_size, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size, 'total_qty': t1.total_qty}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'p_size': p_size}, orderings=[(p_size):asc_first]) + LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_size': p_size}) @@ -10,7 +10,7 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ( JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'p_size': p_size}, orderings=[(p_size):asc_first]) + LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_cross_7.txt b/tests/test_plan_refsols/simple_cross_7.txt index 512241e07..e88f1ca6d 100644 --- a/tests/test_plan_refsols/simple_cross_7.txt +++ b/tests/test_plan_refsols/simple_cross_7.txt @@ -1,13 +1,11 @@ -ROOT(columns=[('original_order_key', o_orderkey), ('n_other_orders', n_other_orders)], orderings=[(n_other_orders):desc_last, (o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_other_orders': n_other_orders, 'o_orderkey': o_orderkey}, orderings=[(n_other_orders):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'n_other_orders': DEFAULT_TO(n_rows, 0:numeric), 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_orderkey': t0.o_orderkey}) - FILTER(condition=o_orderstatus == 'P':string, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=customer_key_3 == o_custkey & order_date_5 == o_orderdate & key_4 > o_orderkey, columns={'o_orderkey': o_orderkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_key_3': t1.o_custkey, 'key_4': t1.o_orderkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_5': t1.o_orderdate}) - FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) - FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) +ROOT(columns=[('original_order_key', o_orderkey), ('n_other_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (o_orderkey):asc_first], limit=5:numeric) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_orderkey': t0.o_orderkey}) + FILTER(condition=o_orderstatus == 'P':string, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=customer_key_3 == o_custkey & order_date_5 == o_orderdate & key_4 > o_orderkey, columns={'o_orderkey': o_orderkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_key_3': t1.o_custkey, 'key_4': t1.o_orderkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_5': t1.o_orderdate}) + FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) + FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) diff --git a/tests/test_plan_refsols/simple_cross_9.txt b/tests/test_plan_refsols/simple_cross_9.txt index 95f9968bd..805800c40 100644 --- a/tests/test_plan_refsols/simple_cross_9.txt +++ b/tests/test_plan_refsols/simple_cross_9.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('n1', n_name), ('n2', name_9)], orderings=[(n_name):asc_first, (name_9):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'name_9': name_9}, orderings=[(n_name):asc_first, (name_9):asc_first]) - FILTER(condition=n_name != name_9, columns={'n_name': n_name, 'name_9': name_9}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'name_9': t1.n_name}) - JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('n1', n_name), ('n2', name_9)], orderings=[(n_name):asc_first, (name_9):asc_first], limit=10:numeric) + FILTER(condition=n_name != name_9, columns={'n_name': n_name, 'name_9': name_9}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'name_9': t1.n_name}) + JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_filter_top_five.txt b/tests/test_plan_refsols/simple_filter_top_five.txt index f9546284c..393975351 100644 --- a/tests/test_plan_refsols/simple_filter_top_five.txt +++ b/tests/test_plan_refsols/simple_filter_top_five.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('key', o_orderkey)], orderings=[(o_orderkey):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'o_orderkey': o_orderkey}, orderings=[(o_orderkey):desc_last]) - FILTER(condition=o_totalprice < 1000.0:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('key', o_orderkey)], orderings=[(o_orderkey):desc_last], limit=5:numeric) + FILTER(condition=o_totalprice < 1000.0:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/simple_scan_top_five.txt b/tests/test_plan_refsols/simple_scan_top_five.txt index 55511d884..796a57a65 100644 --- a/tests/test_plan_refsols/simple_scan_top_five.txt +++ b/tests/test_plan_refsols/simple_scan_top_five.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', o_orderkey)], orderings=[(o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'o_orderkey': o_orderkey}, orderings=[(o_orderkey):asc_first]) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) +ROOT(columns=[('key', o_orderkey)], orderings=[(o_orderkey):asc_first], limit=5:numeric) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_topk.txt b/tests/test_plan_refsols/simple_topk.txt index e7159d251..361fe25b0 100644 --- a/tests/test_plan_refsols/simple_topk.txt +++ b/tests/test_plan_refsols/simple_topk.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):asc_last]) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):asc_last]) - SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):asc_last], limit=2:numeric) + SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/singular3.txt b/tests/test_plan_refsols/singular3.txt index 646e5c0b3..6d1870895 100644 --- a/tests/test_plan_refsols/singular3.txt +++ b/tests/test_plan_refsols/singular3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) + LIMIT(limit=5:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index 889ab189c..77ed0b3c7 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) - FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) +ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular5.txt b/tests/test_plan_refsols/singular5.txt index 8c1a46891..20517bc26 100644 --- a/tests/test_plan_refsols/singular5.txt +++ b/tests/test_plan_refsols/singular5.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('container', p_container), ('highest_price_ship_date', max_anything_l_shipdate)], orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}, orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}) - AGGREGATE(keys={'p_container': p_container}, aggregations={'max_anything_l_shipdate': MAX(anything_l_shipdate), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'p_partkey': p_partkey}, aggregations={'anything_l_shipdate': ANYTHING(l_shipdate), 'n_rows': COUNT()}) - FILTER(condition=RANKING(args=[], partition=[p_container], order=[(l_extendedprice):desc_first, (l_shipdate):asc_last]) == 1:numeric, columns={'l_shipdate': l_shipdate, 'p_partkey': p_partkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) +ROOT(columns=[('container', p_container), ('highest_price_ship_date', max_anything_l_shipdate)], orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first], limit=5:numeric) + FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}) + AGGREGATE(keys={'p_container': p_container}, aggregations={'max_anything_l_shipdate': MAX(anything_l_shipdate), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'p_partkey': p_partkey}, aggregations={'anything_l_shipdate': ANYTHING(l_shipdate), 'n_rows': COUNT()}) + FILTER(condition=RANKING(args=[], partition=[p_container], order=[(l_extendedprice):desc_first, (l_shipdate):asc_last]) == 1:numeric, columns={'l_shipdate': l_shipdate, 'p_partkey': p_partkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/singular6.txt b/tests/test_plan_refsols/singular6.txt index 48fc18c62..c118e82c9 100644 --- a/tests/test_plan_refsols/singular6.txt +++ b/tests/test_plan_refsols/singular6.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', n_name)], orderings=[(l_receiptdate):asc_first, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'l_receiptdate': l_receiptdate, 'n_name': n_name}, orderings=[(l_receiptdate):asc_first, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) - FILTER(condition=c_nationkey == 4:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_receiptdate):asc_last, (l_extendedprice * 1:numeric - l_discount):desc_first]) == 1:numeric, columns={'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) - FILTER(condition=o_clerk == 'Clerk#000000017':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', n_name)], orderings=[(l_receiptdate):asc_first, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) + FILTER(condition=c_nationkey == 4:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_receiptdate):asc_last, (l_extendedprice * 1:numeric - l_discount):desc_first]) == 1:numeric, columns={'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) + FILTER(condition=o_clerk == 'Clerk#000000017':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index bb1b832a9..f03df0318 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) - FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_decode3.txt b/tests/test_plan_refsols/sqlite_udf_decode3.txt index f213966cc..1d28d62e6 100644 --- a/tests/test_plan_refsols/sqlite_udf_decode3.txt +++ b/tests/test_plan_refsols/sqlite_udf_decode3.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('key', o_orderkey), ('val', DECODE3(INTEGER(SLICE(o_orderpriority, None:unknown, 1:numeric, None:unknown)), 1:numeric, 'A':string, 2:numeric, 'B':string, 3:numeric, 'C':string, 'D':string))], orderings=[(o_orderkey):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}, orderings=[(o_orderkey):asc_first]) - FILTER(condition=o_clerk == 'Clerk#000000951':string, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('key', o_orderkey), ('val', DECODE3(INTEGER(SLICE(o_orderpriority, None:unknown, 1:numeric, None:unknown)), 1:numeric, 'A':string, 2:numeric, 'B':string, 3:numeric, 'C':string, 'D':string))], orderings=[(o_orderkey):asc_first], limit=10:numeric) + FILTER(condition=o_clerk == 'Clerk#000000951':string, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/sqlite_udf_format_datetime.txt b/tests/test_plan_refsols/sqlite_udf_format_datetime.txt index 1a89dd509..fb580b235 100644 --- a/tests/test_plan_refsols/sqlite_udf_format_datetime.txt +++ b/tests/test_plan_refsols/sqlite_udf_format_datetime.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', o_orderkey), ('d1', FORMAT_DATETIME('%d/%m/%Y':string, o_orderdate)), ('d2', FORMAT_DATETIME('%Y:%j':string, o_orderdate)), ('d3', INTEGER(FORMAT_DATETIME('%s':string, o_orderdate))), ('d4', INTEGER(FORMAT_DATETIME_VARIADIC('%Y%m%d':string, o_orderdate, '+39 days':string, 'start of month':string)))], orderings=[(o_totalprice):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):asc_first]) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('key', o_orderkey), ('d1', FORMAT_DATETIME('%d/%m/%Y':string, o_orderdate)), ('d2', FORMAT_DATETIME('%Y:%j':string, o_orderdate)), ('d3', INTEGER(FORMAT_DATETIME('%s':string, o_orderdate))), ('d4', INTEGER(FORMAT_DATETIME_VARIADIC('%Y%m%d':string, o_orderdate, '+39 days':string, 'start of month':string)))], orderings=[(o_totalprice):asc_first], limit=5:numeric) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 54d45c7b6..5dbeb3b2f 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,16 +1,15 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', quantity), ('n_shipments', n_rows)], orderings=[(quantity):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'n_rows': n_rows, 'p_name': p_name, 'quantity': quantity, 's_name': s_name}, orderings=[(quantity):desc_last, (s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 'quantity': t1.quantity, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', quantity), ('n_shipments', n_rows)], orderings=[(quantity):desc_last, (s_name):asc_first], limit=3:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 'quantity': t1.quantity, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) + PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index 6a203c579..9c00e03aa 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,17 +1,15 @@ -ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', national_qty_pct)], orderings=[(national_qty_pct):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'national_qty_pct': national_qty_pct, 's_name': s_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(national_qty_pct):desc_last]) - PROJECT(columns={'n_name': n_name, 'national_qty_pct': 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]), 's_name': s_name, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=s_acctbal >= 0.0:numeric & CONTAINS(s_comment, 'careful':string), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=YEAR(l_shipdate) == 1995:numeric & l_shipmode == 'SHIP':string, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - FILTER(condition=CONTAINS(p_name, 'tomato':string) & STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=s_acctbal >= 0.0:numeric & CONTAINS(s_comment, 'careful':string), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=YEAR(l_shipdate) == 1995:numeric & l_shipmode == 'SHIP':string, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + FILTER(condition=CONTAINS(p_name, 'tomato':string) & STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/suppliers_bal_diffs.txt b/tests/test_plan_refsols/suppliers_bal_diffs.txt index e95879b55..998e0dde9 100644 --- a/tests/test_plan_refsols/suppliers_bal_diffs.txt +++ b/tests/test_plan_refsols/suppliers_bal_diffs.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', acctbal_delta)], orderings=[(acctbal_delta):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name, 's_name': s_name}, orderings=[(acctbal_delta):desc_last]) - PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name': r_name, 's_name': s_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey}) +ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]))], orderings=[(s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last])):desc_last], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index d1fba65bb..995700439 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ir)], orderings=[(ir):desc_last, (pr_name):asc_first, (co_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name, 'ir': ir, 'pr_name': pr_name}, orderings=[(ir):desc_last, (pr_name):asc_first, (co_name):asc_first]) - PROJECT(columns={'co_name': co_name, 'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_name': pr_name}) - AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'co_name': co_name, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_name': pr_name}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) - FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) - SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) +ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) + AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) + PROJECT(columns={'co_name': co_name, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_name': pr_name}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) + FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) + SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index a380dffc1..431996dec 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('factory_country', co_name), ('purchase_country', purchase_country), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name, 'ir': ir, 'purchase_country': purchase_country}, orderings=[(ir):desc_last]) - PROJECT(columns={'co_name': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'purchase_country': name_2}) - JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) + JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_hot_purchase_window.txt b/tests/test_plan_refsols/technograph_hot_purchase_window.txt index 9d2538e42..52172962b 100644 --- a/tests/test_plan_refsols/technograph_hot_purchase_window.txt +++ b/tests/test_plan_refsols/technograph_hot_purchase_window.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('start_of_period', start_of_period), ('n_purchases', n_purchases)], orderings=[(n_purchases):desc_last, (start_of_period):asc_first]) - LIMIT(limit=Literal(value=1, type=NumericType()), columns={'n_purchases': n_purchases, 'start_of_period': start_of_period}, orderings=[(n_purchases):desc_last, (start_of_period):asc_first]) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_purchases': COUNT(), 'start_of_period': ANYTHING(ca_dt)}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - FILTER(condition=calendar_day_1 < DATETIME(ca_dt, '+5 days':string) & calendar_day_1 >= ca_dt, columns={'ca_dt': ca_dt, 'calendar_day_1': calendar_day_1}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=YEAR(ca_dt) == 2024:numeric, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) +ROOT(columns=[('start_of_period', start_of_period), ('n_purchases', n_purchases)], orderings=[(n_purchases):desc_last, (start_of_period):asc_first], limit=1:numeric) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_purchases': COUNT(), 'start_of_period': ANYTHING(ca_dt)}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + FILTER(condition=calendar_day_1 < DATETIME(ca_dt, '+5 days':string) & calendar_day_1 >= ca_dt, columns={'ca_dt': ca_dt, 'calendar_day_1': calendar_day_1}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=YEAR(ca_dt) == 2024:numeric, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index df4147ff6..46de1e87a 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,13 +1,11 @@ -ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}, orderings=[(ir):desc_last]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}) - JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) - SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'de_product_id': de_product_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) + JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) + SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) + PROJECT(columns={'de_product_id': de_product_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt index 404152d0e..b24f78d73 100644 --- a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt +++ b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'ordering_0': ordering_0, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) +ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt index 9efe788ce..7d8c385c4 100644 --- a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt +++ b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ordering_0': ordering_0}, orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) +ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_customers_by_orders.txt b/tests/test_plan_refsols/top_customers_by_orders.txt index a9e71b355..f2e888a70 100644 --- a/tests/test_plan_refsols/top_customers_by_orders.txt +++ b/tests/test_plan_refsols/top_customers_by_orders.txt @@ -1,7 +1,5 @@ -ROOT(columns=[('customer_key', c_custkey), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (c_custkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(n_orders):desc_last, (c_custkey):asc_first]) - PROJECT(columns={'c_custkey': c_custkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('customer_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_custkey):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/topk_order_by.txt b/tests/test_plan_refsols/topk_order_by.txt index b0a7cf44a..140c40392 100644 --- a/tests/test_plan_refsols/topk_order_by.txt +++ b/tests/test_plan_refsols/topk_order_by.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):asc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):asc_last]) - SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):asc_last], limit=10:numeric) + SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/topk_order_by_calc.txt b/tests/test_plan_refsols/topk_order_by_calc.txt index 58dcdcb9f..23736fba9 100644 --- a/tests/test_plan_refsols/topk_order_by_calc.txt +++ b/tests/test_plan_refsols/topk_order_by_calc.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('region_name', r_name), ('name_length', LENGTH(r_name))], orderings=[(r_name):asc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_name': r_name}, orderings=[(r_name):asc_last]) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('region_name', r_name), ('name_length', LENGTH(r_name))], orderings=[(r_name):asc_last], limit=10:numeric) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/topk_replace_order_by.txt b/tests/test_plan_refsols/topk_replace_order_by.txt index b677480ad..093f06879 100644 --- a/tests/test_plan_refsols/topk_replace_order_by.txt +++ b/tests/test_plan_refsols/topk_replace_order_by.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):desc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):desc_first]) - SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):desc_first], limit=10:numeric) + SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/topk_root_different_order_by.txt b/tests/test_plan_refsols/topk_root_different_order_by.txt index ec47fee47..3faee21c0 100644 --- a/tests/test_plan_refsols/topk_root_different_order_by.txt +++ b/tests/test_plan_refsols/topk_root_different_order_by.txt @@ -1,3 +1,3 @@ ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):desc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):asc_first]) + LIMIT(limit=10:numeric, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):asc_first]) SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index f92f003ec..cc56c28d9 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,14 +1,12 @@ -ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (c_custkey):asc_first], limit=20:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 207c69bc1..1c7b826c2 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}, orderings=[(VALUE):desc_last]) - FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, 'ps_partkey': ps_partkey}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) - AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) - PROJECT(columns={'metric': ps_supplycost * ps_availqty}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last], limit=10:numeric) + FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, 'ps_partkey': ps_partkey}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) + AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) + PROJECT(columns={'metric': ps_supplycost * ps_availqty}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(expr_2)}) + PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q13.txt b/tests/test_plan_refsols/tpch_q13.txt index 04c2c6768..ee9fe24e8 100644 --- a/tests/test_plan_refsols/tpch_q13.txt +++ b/tests/test_plan_refsols/tpch_q13.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'CUSTDIST': CUSTDIST, 'num_non_special_orders': num_non_special_orders}, orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last]) - AGGREGATE(keys={'num_non_special_orders': num_non_special_orders}, aggregations={'CUSTDIST': COUNT()}) - PROJECT(columns={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_comment': o_comment, 'o_custkey': o_custkey}) +ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last], limit=10:numeric) + AGGREGATE(keys={'num_non_special_orders': num_non_special_orders}, aggregations={'CUSTDIST': COUNT()}) + PROJECT(columns={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_comment': o_comment, 'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q16.txt b/tests/test_plan_refsols/tpch_q16.txt index fa9f4ec4d..07c0cfd97 100644 --- a/tests/test_plan_refsols/tpch_q16.txt +++ b/tests/test_plan_refsols/tpch_q16.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('P_BRAND', p_brand), ('P_TYPE', p_type), ('P_SIZE', p_size), ('SUPPLIER_COUNT', SUPPLIER_COUNT)], orderings=[(SUPPLIER_COUNT):desc_last, (p_brand):asc_first, (p_type):asc_first, (p_size):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'SUPPLIER_COUNT': SUPPLIER_COUNT, 'p_brand': p_brand, 'p_size': p_size, 'p_type': p_type}, orderings=[(SUPPLIER_COUNT):desc_last, (p_brand):asc_first, (p_type):asc_first, (p_size):asc_first]) - AGGREGATE(keys={'p_brand': p_brand, 'p_size': p_size, 'p_type': p_type}, aggregations={'SUPPLIER_COUNT': NDISTINCT(ps_suppkey)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_brand': t1.p_brand, 'p_size': t1.p_size, 'p_type': t1.p_type, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=NOT(LIKE(s_comment, '%Customer%Complaints%':string)), columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_comment': s_comment, 's_suppkey': s_suppkey}) - FILTER(condition=p_brand != 'BRAND#45':string & ISIN(p_size, [49, 14, 23, 45, 19, 3, 36, 9]:array[unknown]) & NOT(STARTSWITH(p_type, 'MEDIUM POLISHED%':string)), columns={'p_brand': p_brand, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) +ROOT(columns=[('P_BRAND', p_brand), ('P_TYPE', p_type), ('P_SIZE', p_size), ('SUPPLIER_COUNT', SUPPLIER_COUNT)], orderings=[(SUPPLIER_COUNT):desc_last, (p_brand):asc_first, (p_type):asc_first, (p_size):asc_first], limit=10:numeric) + AGGREGATE(keys={'p_brand': p_brand, 'p_size': p_size, 'p_type': p_type}, aggregations={'SUPPLIER_COUNT': NDISTINCT(ps_suppkey)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_brand': t1.p_brand, 'p_size': t1.p_size, 'p_type': t1.p_type, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=NOT(LIKE(s_comment, '%Customer%Complaints%':string)), columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_comment': s_comment, 's_suppkey': s_suppkey}) + FILTER(condition=p_brand != 'BRAND#45':string & ISIN(p_size, [49, 14, 23, 45, 19, 3, 36, 9]:array[unknown]) & NOT(STARTSWITH(p_type, 'MEDIUM POLISHED%':string)), columns={'p_brand': p_brand, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index 8de1daa53..ff041d5aa 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) +ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index bbad37875..135018c23 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}, orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first]) - FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) - FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first], limit=10:numeric) + FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) + FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 6f4efa2d8..fc41ee839 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -1,19 +1,18 @@ -ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'s_address': s_address, 's_name': s_name}, orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) +ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first], limit=10:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) + PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 12f513598..707fdeb6b 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,27 +1,25 @@ -ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', NUMWAIT)], orderings=[(NUMWAIT):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'NUMWAIT': NUMWAIT, 's_name': s_name}, orderings=[(NUMWAIT):desc_last, (s_name):asc_first]) - PROJECT(columns={'NUMWAIT': DEFAULT_TO(n_rows, 0:numeric), 's_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.anything_l_linenumber == t1.l_linenumber & t0.anything_l_orderkey == t1.l_orderkey & t0.anything_o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) - FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_linenumber': anything_l_linenumber, 'anything_l_orderkey': anything_l_orderkey, 'anything_l_suppkey': anything_l_suppkey, 'anything_o_orderkey': anything_o_orderkey}) - AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_linenumber': ANYTHING(l_linenumber), 'anything_l_orderkey': ANYTHING(l_orderkey), 'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderkey': ANYTHING(o_orderkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) - FILTER(condition=supplier_key_19 != l_suppkey, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus, 'supplier_key_19': t1.l_suppkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) - FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - FILTER(condition=supplier_key_36 != l_suppkey, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'supplier_key_36': t1.l_suppkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) +ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (s_name):asc_first], limit=10:numeric) + JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.anything_l_linenumber == t1.l_linenumber & t0.anything_l_orderkey == t1.l_orderkey & t0.anything_o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_linenumber': anything_l_linenumber, 'anything_l_orderkey': anything_l_orderkey, 'anything_l_suppkey': anything_l_suppkey, 'anything_o_orderkey': anything_o_orderkey}) + AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_linenumber': ANYTHING(l_linenumber), 'anything_l_orderkey': ANYTHING(l_orderkey), 'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderkey': ANYTHING(o_orderkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) + FILTER(condition=supplier_key_19 != l_suppkey, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus, 'supplier_key_19': t1.l_suppkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - FILTER(condition=l_receiptdate > l_commitdate, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + FILTER(condition=supplier_key_36 != l_suppkey, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'supplier_key_36': t1.l_suppkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) + FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + FILTER(condition=l_receiptdate > l_commitdate, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index 61ed28ec5..f6c53f4e2 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -1,13 +1,11 @@ -ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', REVENUE), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(REVENUE):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'REVENUE': REVENUE, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, orderings=[(REVENUE):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) - FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) +ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first], limit=10:numeric) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index 8abc81dc2..634b9ee27 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -1,16 +1,15 @@ -ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'o_year': o_year, 'sum_value': sum_value}, orderings=[(n_name):asc_first, (o_year):desc_last]) - AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last], limit=10:numeric) + AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) + PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index a367c4443..ddf6fd292 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,6 +1,4 @@ -ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) - SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) +ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w2', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w3', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w6', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w7', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))), ('w8', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) + SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index 80af8f609..5b0a9e60f 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,6 +1,4 @@ -ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) - SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) +ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w2', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w3', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w6', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w7', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))), ('w8', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) + SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/year_month_nation_orders.txt b/tests/test_plan_refsols/year_month_nation_orders.txt index 99d446335..2628b6b63 100644 --- a/tests/test_plan_refsols/year_month_nation_orders.txt +++ b/tests/test_plan_refsols/year_month_nation_orders.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_orders': n_orders, 'order_month': order_month, 'order_year': order_year}, orderings=[(n_orders):desc_last]) - AGGREGATE(keys={'n_name': n_name, 'order_month': order_month, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) - FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last], limit=5:numeric) + AGGREGATE(keys={'n_name': n_name, 'order_month': order_month, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) + PROJECT(columns={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) + FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/years_months_days_hours_datediff.txt b/tests/test_plan_refsols/years_months_days_hours_datediff.txt index 6a736c481..ca2da3124 100644 --- a/tests/test_plan_refsols/years_months_days_hours_datediff.txt +++ b/tests/test_plan_refsols/years_months_days_hours_datediff.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', years_diff), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(years_diff):asc_first]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'years_diff': years_diff}, orderings=[(years_diff):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'years_diff': DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)}) - FILTER(condition=YEAR(sbTxDateTime) < 2025:numeric, columns={'sbTxDateTime': sbTxDateTime}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) +ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)):asc_first], limit=30:numeric) + FILTER(condition=YEAR(sbTxDateTime) < 2025:numeric, columns={'sbTxDateTime': sbTxDateTime}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_sql_refsols/datediff_ansi.sql b/tests/test_sql_refsols/datediff_ansi.sql index 7b2fcb32f..1fdbab326 100644 --- a/tests/test_sql_refsols/datediff_ansi.sql +++ b/tests/test_sql_refsols/datediff_ansi.sql @@ -1,24 +1,16 @@ -WITH _t0 AS ( - SELECT - sbtxdatetime, - DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff - FROM main.sbtransaction - WHERE - EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 - ORDER BY - years_diff - LIMIT 30 -) SELECT sbtxdatetime AS x, CAST('2025-05-02 11:00:00' AS TIMESTAMP) AS y1, CAST('2023-04-03 13:16:30' AS TIMESTAMP) AS y, - years_diff, + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MONTH) AS months_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), DAY) AS days_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), HOUR) AS hours_diff, DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MINUTE) AS minutes_diff, DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), SECOND) AS seconds_diff -FROM _t0 +FROM main.sbtransaction +WHERE + EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 ORDER BY - years_diff + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) +LIMIT 30 diff --git a/tests/test_sql_refsols/datediff_sqlite.sql b/tests/test_sql_refsols/datediff_sqlite.sql index 4db97a7d2..967f26673 100644 --- a/tests/test_sql_refsols/datediff_sqlite.sql +++ b/tests/test_sql_refsols/datediff_sqlite.sql @@ -1,19 +1,8 @@ -WITH _t0 AS ( - SELECT - sbtxdatetime, - CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff - FROM main.sbtransaction - WHERE - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 - ORDER BY - years_diff - LIMIT 30 -) SELECT sbtxdatetime AS x, '2025-05-02 11:00:00' AS y1, '2023-04-03 13:16:30' AS y, - years_diff, + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff, ( CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) ) * 12 + CAST(STRFTIME('%m', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%m', sbtxdatetime) AS INTEGER) AS months_diff, @@ -35,6 +24,9 @@ SELECT ) AS INTEGER) * 24 + CAST(STRFTIME('%H', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%H', sbtxdatetime) AS INTEGER) ) * 60 + CAST(STRFTIME('%M', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%M', sbtxdatetime) AS INTEGER) ) * 60 + CAST(STRFTIME('%S', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%S', sbtxdatetime) AS INTEGER) AS seconds_diff -FROM _t0 +FROM main.sbtransaction +WHERE + CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 ORDER BY - years_diff + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) +LIMIT 30 diff --git a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql index 9ff77a9fc..4662c008b 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql @@ -18,5 +18,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s3 AS _s3 ON _s3.sbcustid = sbcustomer.sbcustid ORDER BY - num_transactions DESC + COALESCE(_s3.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql index b44a2cd0c..044bef2b7 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql @@ -18,5 +18,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s3 AS _s3 ON _s3.sbcustid = sbcustomer.sbcustid ORDER BY - num_transactions DESC + COALESCE(_s3.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv1_ansi.sql b/tests/test_sql_refsols/defog_broker_adv1_ansi.sql index d246ee325..78aa9bb78 100644 --- a/tests/test_sql_refsols/defog_broker_adv1_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv1_ansi.sql @@ -13,5 +13,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 ON _s1.sbtxcustid = sbcustomer.sbcustid ORDER BY - total_amount DESC + COALESCE(_s1.sum_sbtxamount, 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_adv1_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv1_sqlite.sql index d246ee325..78aa9bb78 100644 --- a/tests/test_sql_refsols/defog_broker_adv1_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv1_sqlite.sql @@ -13,5 +13,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 ON _s1.sbtxcustid = sbcustomer.sbcustid ORDER BY - total_amount DESC + COALESCE(_s1.sum_sbtxamount, 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_adv2_ansi.sql b/tests/test_sql_refsols/defog_broker_adv2_ansi.sql index e8a256981..1e8577455 100644 --- a/tests/test_sql_refsols/defog_broker_adv2_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv2_ansi.sql @@ -16,5 +16,5 @@ FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - tx_count DESC + COALESCE(_s1.n_rows, 0) DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_broker_adv2_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv2_sqlite.sql index 64bc9b567..d3563cf71 100644 --- a/tests/test_sql_refsols/defog_broker_adv2_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv2_sqlite.sql @@ -16,5 +16,5 @@ FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - tx_count DESC + COALESCE(_s1.n_rows, 0) DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_broker_adv4_ansi.sql b/tests/test_sql_refsols/defog_broker_adv4_ansi.sql index 912d5618c..647c42c1b 100644 --- a/tests/test_sql_refsols/defog_broker_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv4_ansi.sql @@ -16,5 +16,5 @@ FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbdptickerid = sbticker.sbtickerid ORDER BY - price_change DESC + _s1.max_sbdphigh - _s1.min_sbdplow DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_adv4_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv4_sqlite.sql index af0ff2bbd..1f79591d5 100644 --- a/tests/test_sql_refsols/defog_broker_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv4_sqlite.sql @@ -16,5 +16,5 @@ FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbdptickerid = sbticker.sbtickerid ORDER BY - price_change DESC + _s1.max_sbdphigh - _s1.min_sbdplow DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql index 81e55dbda..e419755e3 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql @@ -6,22 +6,14 @@ WITH _s1 AS ( FROM main.sbtransaction GROUP BY sbtxtickerid -), _t0 AS ( - SELECT - _s1.n_rows, - sbticker.sbtickersymbol, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount - FROM main.sbticker AS sbticker - LEFT JOIN _s1 AS _s1 - ON _s1.sbtxtickerid = sbticker.sbtickerid - ORDER BY - total_amount DESC - LIMIT 10 ) SELECT - sbtickersymbol AS symbol, - COALESCE(n_rows, 0) AS num_transactions, - total_amount -FROM _t0 + sbticker.sbtickersymbol AS symbol, + COALESCE(_s1.n_rows, 0) AS num_transactions, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - total_amount DESC + COALESCE(_s1.sum_sbtxamount, 0) DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql index 81e55dbda..e419755e3 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql @@ -6,22 +6,14 @@ WITH _s1 AS ( FROM main.sbtransaction GROUP BY sbtxtickerid -), _t0 AS ( - SELECT - _s1.n_rows, - sbticker.sbtickersymbol, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount - FROM main.sbticker AS sbticker - LEFT JOIN _s1 AS _s1 - ON _s1.sbtxtickerid = sbticker.sbtickerid - ORDER BY - total_amount DESC - LIMIT 10 ) SELECT - sbtickersymbol AS symbol, - COALESCE(n_rows, 0) AS num_transactions, - total_amount -FROM _t0 + sbticker.sbtickersymbol AS symbol, + COALESCE(_s1.n_rows, 0) AS num_transactions, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - total_amount DESC + COALESCE(_s1.sum_sbtxamount, 0) DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/defog_broker_gen4_ansi.sql b/tests/test_sql_refsols/defog_broker_gen4_ansi.sql index 2f01c1125..7ee620788 100644 --- a/tests/test_sql_refsols/defog_broker_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_gen4_ansi.sql @@ -18,5 +18,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 ON _s1.sbtxcustid = sbcustomer.sbcustid ORDER BY - num_tx DESC + COALESCE(_s1.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_gen4_sqlite.sql b/tests/test_sql_refsols/defog_broker_gen4_sqlite.sql index 1c616a29e..97950bc5b 100644 --- a/tests/test_sql_refsols/defog_broker_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_gen4_sqlite.sql @@ -16,5 +16,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 ON _s1.sbtxcustid = sbcustomer.sbcustid ORDER BY - num_tx DESC + COALESCE(_s1.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv16_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv16_ansi.sql index db538f89d..4e7ffe8fa 100644 --- a/tests/test_sql_refsols/defog_dealership_adv16_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv16_ansi.sql @@ -15,5 +15,5 @@ FROM main.salespersons AS salespersons LEFT JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - total DESC + COALESCE(_s1.sum_sale_price, 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_adv16_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv16_sqlite.sql index db538f89d..4e7ffe8fa 100644 --- a/tests/test_sql_refsols/defog_dealership_adv16_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv16_sqlite.sql @@ -15,5 +15,5 @@ FROM main.salespersons AS salespersons LEFT JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - total DESC + COALESCE(_s1.sum_sale_price, 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql index 7bdd812ac..e29bcd389 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql @@ -8,24 +8,15 @@ WITH _s1 AS ( sale_date >= DATE_ADD(CURRENT_TIMESTAMP(), -3, 'MONTH') GROUP BY salesperson_id -), _t0 AS ( - SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue - FROM main.salespersons AS salespersons - LEFT JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id - ORDER BY - total_revenue DESC - LIMIT 3 ) SELECT - first_name, - last_name, - COALESCE(n_rows, 0) AS total_sales, - total_revenue -FROM _t0 + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - total_revenue DESC + COALESCE(_s1.sum_sale_price, 0) DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql index 280dd33d8..6c9b4e48c 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql @@ -8,24 +8,15 @@ WITH _s1 AS ( sale_date >= DATETIME('now', '-3 month') GROUP BY salesperson_id -), _t0 AS ( - SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue - FROM main.salespersons AS salespersons - LEFT JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id - ORDER BY - total_revenue DESC - LIMIT 3 ) SELECT - first_name, - last_name, - COALESCE(n_rows, 0) AS total_sales, - total_revenue -FROM _t0 + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - total_revenue DESC + COALESCE(_s1.sum_sale_price, 0) DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 3134aaf0d..4239e292e 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -8,24 +8,15 @@ WITH _s1 AS ( DATEDIFF(CURRENT_TIMESTAMP(), CAST(sale_date AS DATETIME), DAY) <= 30 GROUP BY salesperson_id -), _t0 AS ( - SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows, - _s1.sum_sale_price - FROM main.salespersons AS salespersons - JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id - ORDER BY - n_rows DESC - LIMIT 5 ) SELECT - first_name, - last_name, - n_rows AS total_sales, - COALESCE(sum_sale_price, 0) AS total_revenue -FROM _t0 + salespersons.first_name, + salespersons.last_name, + _s1.n_rows AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - n_rows DESC + _s1.n_rows DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index 4a046fb86..796cac46a 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -10,24 +10,15 @@ WITH _s1 AS ( ) AS INTEGER) <= 30 GROUP BY salesperson_id -), _t0 AS ( - SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows, - _s1.sum_sale_price - FROM main.salespersons AS salespersons - JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id - ORDER BY - n_rows DESC - LIMIT 5 ) SELECT - first_name, - last_name, - n_rows AS total_sales, - COALESCE(sum_sale_price, 0) AS total_revenue -FROM _t0 + salespersons.first_name, + salespersons.last_name, + _s1.n_rows AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - n_rows DESC + _s1.n_rows DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic6_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic6_ansi.sql index b5c2b3306..92893c9eb 100644 --- a/tests/test_sql_refsols/defog_dealership_basic6_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic6_ansi.sql @@ -8,5 +8,5 @@ JOIN main.customers AS customers GROUP BY customers.state ORDER BY - total_revenue DESC + COALESCE(SUM(sales.sale_price), 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic6_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic6_sqlite.sql index b5c2b3306..92893c9eb 100644 --- a/tests/test_sql_refsols/defog_dealership_basic6_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic6_sqlite.sql @@ -8,5 +8,5 @@ JOIN main.customers AS customers GROUP BY customers.state ORDER BY - total_revenue DESC + COALESCE(SUM(sales.sale_price), 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic7_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic7_ansi.sql index 1e9eee3e3..0326e6c44 100644 --- a/tests/test_sql_refsols/defog_dealership_basic7_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic7_ansi.sql @@ -6,5 +6,5 @@ FROM main.payments_received GROUP BY payment_method ORDER BY - total_amount DESC + COALESCE(SUM(payment_amount), 0) DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic7_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic7_sqlite.sql index 1e9eee3e3..0326e6c44 100644 --- a/tests/test_sql_refsols/defog_dealership_basic7_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic7_sqlite.sql @@ -6,5 +6,5 @@ FROM main.payments_received GROUP BY payment_method ORDER BY - total_amount DESC + COALESCE(SUM(payment_amount), 0) DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql index fe9f62139..ada0973e0 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql @@ -6,24 +6,15 @@ WITH _s1 AS ( FROM main.sales GROUP BY car_id -), _t0 AS ( - SELECT - cars.make, - cars.model, - _s1.n_rows, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - ORDER BY - total_revenue DESC - LIMIT 5 ) SELECT - make, - model, - COALESCE(n_rows, 0) AS total_sales, - total_revenue -FROM _t0 + cars.make, + cars.model, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id ORDER BY - total_revenue DESC + COALESCE(_s1.sum_sale_price, 0) DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql index fe9f62139..ada0973e0 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql @@ -6,24 +6,15 @@ WITH _s1 AS ( FROM main.sales GROUP BY car_id -), _t0 AS ( - SELECT - cars.make, - cars.model, - _s1.n_rows, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - ORDER BY - total_revenue DESC - LIMIT 5 ) SELECT - make, - model, - COALESCE(n_rows, 0) AS total_sales, - total_revenue -FROM _t0 + cars.make, + cars.model, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id ORDER BY - total_revenue DESC + COALESCE(_s1.sum_sale_price, 0) DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_gen1_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen1_ansi.sql index b52fa5282..6c664437a 100644 --- a/tests/test_sql_refsols/defog_dealership_gen1_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen1_ansi.sql @@ -7,5 +7,5 @@ FROM main.salespersons WHERE NOT termination_date IS NULL ORDER BY - days_employed + DATEDIFF(CAST(termination_date AS DATETIME), CAST(hire_date AS DATETIME), DAY) * 1.0 LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_gen1_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen1_sqlite.sql index e76006a10..2e5c523e7 100644 --- a/tests/test_sql_refsols/defog_dealership_gen1_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen1_sqlite.sql @@ -9,5 +9,7 @@ FROM main.salespersons WHERE NOT termination_date IS NULL ORDER BY - days_employed + CAST(( + JULIANDAY(DATE(termination_date, 'start of day')) - JULIANDAY(DATE(hire_date, 'start of day')) + ) AS INTEGER) * 1.0 LIMIT 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv15_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv15_ansi.sql index 64d4b7ee5..913d74c46 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv15_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv15_ansi.sql @@ -17,5 +17,5 @@ FROM main.merchants AS merchants LEFT JOIN _s3 AS _s3 ON _s3.merchant_id = merchants.mid ORDER BY - coupons_per_merchant DESC + COALESCE(_s3.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv15_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv15_sqlite.sql index 44196293b..1770ea1c5 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv15_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv15_sqlite.sql @@ -21,5 +21,5 @@ FROM main.merchants AS merchants LEFT JOIN _s3 AS _s3 ON _s3.merchant_id = merchants.mid ORDER BY - coupons_per_merchant DESC + COALESCE(_s3.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 5e675ed08..d3c2d33ec 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -9,22 +9,14 @@ WITH _s1 AS ( AND receiver_type = 1 GROUP BY receiver_id -), _t0 AS ( - SELECT - _s1.n_rows, - merchants.name, - COALESCE(_s1.sum_amount, 0) AS total_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - ORDER BY - total_amount DESC - LIMIT 2 ) SELECT - name AS merchant_name, - COALESCE(n_rows, 0) AS total_transactions, - total_amount -FROM _t0 + merchants.name AS merchant_name, + COALESCE(_s1.n_rows, 0) AS total_transactions, + COALESCE(_s1.sum_amount, 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid ORDER BY - total_amount DESC + COALESCE(_s1.sum_amount, 0) DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index cd313570f..f50965205 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -9,22 +9,14 @@ WITH _s1 AS ( AND receiver_type = 1 GROUP BY receiver_id -), _t0 AS ( - SELECT - _s1.n_rows, - merchants.name, - COALESCE(_s1.sum_amount, 0) AS total_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - ORDER BY - total_amount DESC - LIMIT 2 ) SELECT - name AS merchant_name, - COALESCE(n_rows, 0) AS total_transactions, - total_amount -FROM _t0 + merchants.name AS merchant_name, + COALESCE(_s1.n_rows, 0) AS total_transactions, + COALESCE(_s1.sum_amount, 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid ORDER BY - total_amount DESC + COALESCE(_s1.sum_amount, 0) DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index 313c5c222..ffd7b6130 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -6,22 +6,14 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily GROUP BY coupon_id -), _t0 AS ( - SELECT - coupons.code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - _s1.sum_amount - FROM main.coupons AS coupons - LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid - ORDER BY - redemption_count DESC - LIMIT 3 ) SELECT - code AS coupon_code, - redemption_count, - COALESCE(sum_amount, 0) AS total_discount -FROM _t0 + coupons.code AS coupon_code, + COALESCE(_s1.count_txid, 0) AS redemption_count, + COALESCE(_s1.sum_amount, 0) AS total_discount +FROM main.coupons AS coupons +LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid ORDER BY - redemption_count DESC + COALESCE(_s1.count_txid, 0) DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index 313c5c222..ffd7b6130 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -6,22 +6,14 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily GROUP BY coupon_id -), _t0 AS ( - SELECT - coupons.code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - _s1.sum_amount - FROM main.coupons AS coupons - LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid - ORDER BY - redemption_count DESC - LIMIT 3 ) SELECT - code AS coupon_code, - redemption_count, - COALESCE(sum_amount, 0) AS total_discount -FROM _t0 + coupons.code AS coupon_code, + COALESCE(_s1.count_txid, 0) AS redemption_count, + COALESCE(_s1.sum_amount, 0) AS total_discount +FROM main.coupons AS coupons +LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid ORDER BY - redemption_count DESC + COALESCE(_s1.count_txid, 0) DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql index d10bc2ac9..32cdfea28 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql @@ -16,5 +16,5 @@ WHERE GROUP BY _s1.country ORDER BY - total_amount DESC + COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql index d10bc2ac9..32cdfea28 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql @@ -16,5 +16,5 @@ WHERE GROUP BY _s1.country ORDER BY - total_amount DESC + COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql index 5d9adaf39..f9e924c2b 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql @@ -3,40 +3,30 @@ WITH _s2 AS ( ev_dt, ev_key FROM events -), _t0 AS ( - SELECT - eras.er_name, - events.ev_dt, - events.ev_name, - seasons.s_name, - times.t_name - FROM events AS events - JOIN eras AS eras - ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) - AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) - JOIN _s2 AS _s2 - ON _s2.ev_key = events.ev_key - JOIN seasons AS seasons - ON seasons.s_month1 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - OR seasons.s_month2 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - OR seasons.s_month3 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - JOIN _s2 AS _s6 - ON _s6.ev_key = events.ev_key - JOIN times AS times - ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) - AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) - WHERE - events.ev_typ = 'culture' - ORDER BY - ev_dt - LIMIT 6 ) SELECT - ev_name AS event_name, - er_name AS era_name, - EXTRACT(YEAR FROM CAST(ev_dt AS DATETIME)) AS event_year, - s_name AS season_name, - t_name AS tod -FROM _t0 + events.ev_name AS event_name, + eras.er_name AS era_name, + EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) AS event_year, + seasons.s_name AS season_name, + times.t_name AS tod +FROM events AS events +JOIN eras AS eras + ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) + AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) +JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key +JOIN seasons AS seasons + ON seasons.s_month1 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + OR seasons.s_month2 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + OR seasons.s_month3 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) +JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key +JOIN times AS times + ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) + AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) +WHERE + events.ev_typ = 'culture' ORDER BY - ev_dt + events.ev_dt +LIMIT 6 diff --git a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql index 1cbe48ccc..25f7ff28f 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql @@ -3,40 +3,30 @@ WITH _s2 AS ( ev_dt, ev_key FROM events -), _t0 AS ( - SELECT - eras.er_name, - events.ev_dt, - events.ev_name, - seasons.s_name, - times.t_name - FROM events AS events - JOIN eras AS eras - ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) - AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) - JOIN _s2 AS _s2 - ON _s2.ev_key = events.ev_key - JOIN seasons AS seasons - ON seasons.s_month1 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - OR seasons.s_month2 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - OR seasons.s_month3 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - JOIN _s2 AS _s6 - ON _s6.ev_key = events.ev_key - JOIN times AS times - ON times.t_end_hour > CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) - AND times.t_start_hour <= CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) - WHERE - events.ev_typ = 'culture' - ORDER BY - ev_dt - LIMIT 6 ) SELECT - ev_name AS event_name, - er_name AS era_name, - CAST(STRFTIME('%Y', ev_dt) AS INTEGER) AS event_year, - s_name AS season_name, - t_name AS tod -FROM _t0 + events.ev_name AS event_name, + eras.er_name AS era_name, + CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) AS event_year, + seasons.s_name AS season_name, + times.t_name AS tod +FROM events AS events +JOIN eras AS eras + ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) + AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) +JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key +JOIN seasons AS seasons + ON seasons.s_month1 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + OR seasons.s_month2 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + OR seasons.s_month3 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) +JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key +JOIN times AS times + ON times.t_end_hour > CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) + AND times.t_start_hour <= CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) +WHERE + events.ev_typ = 'culture' ORDER BY - ev_dt + events.ev_dt +LIMIT 6 diff --git a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql index 30ed1ca1b..cd2c4a531 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql @@ -1,19 +1,9 @@ -WITH _t0 AS ( - SELECT - ps_availqty, - ps_partkey, - ps_suppkey, - CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost - FROM tpch.partsupp - ORDER BY - total_cost DESC - LIMIT 10 -) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, FLOOR(ps_availqty) AS complete_parts, - total_cost -FROM _t0 + CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost +FROM tpch.partsupp ORDER BY - total_cost DESC + CEIL(ps_supplycost * FLOOR(ps_availqty)) DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql index 95f227be8..6db785108 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql @@ -1,29 +1,29 @@ -WITH _t0 AS ( - SELECT - ps_availqty, - ps_partkey, - ps_suppkey, - CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) + CASE - WHEN CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) < ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) - THEN 1 - ELSE 0 - END AS total_cost - FROM tpch.partsupp - ORDER BY - total_cost DESC - LIMIT 10 -) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END AS complete_parts, - total_cost -FROM _t0 + CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) + CASE + WHEN CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) < ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) + THEN 1 + ELSE 0 + END AS total_cost +FROM tpch.partsupp ORDER BY - total_cost DESC + CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) + CASE + WHEN CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) < ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) + THEN 1 + ELSE 0 + END DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql index b6fe25cdb..b4a7156ba 100644 --- a/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql @@ -1,14 +1,3 @@ -WITH _t0 AS ( - SELECT - o_orderkey, - o_orderpriority - FROM tpch.orders - WHERE - o_clerk = 'Clerk#000000951' - ORDER BY - o_orderkey - LIMIT 10 -) SELECT o_orderkey AS key, CASE @@ -20,6 +9,9 @@ SELECT THEN 'C' ELSE 'D' END AS val -FROM _t0 +FROM tpch.orders +WHERE + o_clerk = 'Clerk#000000951' ORDER BY o_orderkey +LIMIT 10 diff --git a/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql index 7d81bf0ec..df8c90691 100644 --- a/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql @@ -1,19 +1,10 @@ -WITH _t0 AS ( - SELECT - o_orderdate, - o_orderkey, - o_totalprice - FROM tpch.orders - ORDER BY - o_totalprice - LIMIT 5 -) SELECT o_orderkey AS key, STRFTIME('%d/%m/%Y', o_orderdate) AS d1, STRFTIME('%Y:%j', o_orderdate) AS d2, CAST(STRFTIME('%s', o_orderdate) AS INTEGER) AS d3, CAST(STRFTIME('%Y%m%d', o_orderdate, '+39 days', 'start of month') AS INTEGER) AS d4 -FROM _t0 +FROM tpch.orders ORDER BY o_totalprice +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql index da20e4054..954197d6d 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ir DESC, + ROUND(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql index 20695a77b..5462388f5 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ir DESC, + ROUND(CAST(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index bea21073d..c05d32834 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -42,5 +42,7 @@ CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 ORDER BY - ir DESC + ROUND(( + 1.0 * COALESCE(_s9.sum_n_rows, 0) + ) / COALESCE(_s9.n_rows, 0), 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index e460f8540..6cc634740 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -42,5 +42,7 @@ CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 ORDER BY - ir DESC + ROUND(CAST(( + 1.0 * COALESCE(_s9.sum_n_rows, 0) + ) AS REAL) / COALESCE(_s9.n_rows, 0), 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql index d4bcd9d17..276282dbe 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql @@ -27,5 +27,5 @@ FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ir DESC + ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql index 5a99bc7fc..60fd910bb 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql @@ -27,5 +27,5 @@ FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ir DESC + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/tpch_q10_ansi.sql b/tests/test_sql_refsols/tpch_q10_ansi.sql index 912de751d..f0a2699a0 100644 --- a/tests/test_sql_refsols/tpch_q10_ansi.sql +++ b/tests/test_sql_refsols/tpch_q10_ansi.sql @@ -28,6 +28,6 @@ LEFT JOIN _s3 AS _s3 JOIN tpch.nation AS nation ON customer.c_nationkey = nation.n_nationkey ORDER BY - revenue DESC, + COALESCE(_s3.sum_expr_1, 0) DESC, c_custkey LIMIT 20 diff --git a/tests/test_sql_refsols/tpch_q10_sqlite.sql b/tests/test_sql_refsols/tpch_q10_sqlite.sql index 20a1b7a34..7e5943713 100644 --- a/tests/test_sql_refsols/tpch_q10_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q10_sqlite.sql @@ -41,6 +41,6 @@ LEFT JOIN _s3 AS _s3 JOIN tpch.nation AS nation ON customer.c_nationkey = nation.n_nationkey ORDER BY - revenue DESC, + COALESCE(_s3.sum_expr_1, 0) DESC, c_custkey LIMIT 20 diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 64803c51d..67f82c489 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -16,7 +16,7 @@ WITH _s3 AS ( ON _s3.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _t1 AS ( +), _t2 AS ( SELECT COUNT(*) AS n_rows, partsupp.ps_suppkey @@ -35,8 +35,8 @@ SELECT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _t1 AS _t1 - ON _t1.n_rows > 0 AND _t1.ps_suppkey = supplier.s_suppkey +JOIN _t2 AS _t2 + ON _t2.n_rows > 0 AND _t2.ps_suppkey = supplier.s_suppkey ORDER BY s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index e5b221d69..c7f3110d7 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -16,7 +16,7 @@ WITH _s3 AS ( ON _s3.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _t1 AS ( +), _t2 AS ( SELECT COUNT(*) AS n_rows, partsupp.ps_suppkey @@ -35,8 +35,8 @@ SELECT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _t1 AS _t1 - ON _t1.n_rows > 0 AND _t1.ps_suppkey = supplier.s_suppkey +JOIN _t2 AS _t2 + ON _t2.n_rows > 0 AND _t2.ps_suppkey = supplier.s_suppkey ORDER BY s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index 88ec9d9f7..b57b17e13 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -1,4 +1,4 @@ -WITH _t7 AS ( +WITH _t6 AS ( SELECT l_commitdate, l_linenumber, @@ -8,47 +8,47 @@ WITH _t7 AS ( FROM tpch.lineitem WHERE l_commitdate < l_receiptdate -), _t4 AS ( +), _t3 AS ( SELECT - ANY_VALUE(_t7.l_linenumber) AS anything_l_linenumber, - ANY_VALUE(_t7.l_orderkey) AS anything_l_orderkey, - ANY_VALUE(_t7.l_suppkey) AS anything_l_suppkey, + ANY_VALUE(_t6.l_linenumber) AS anything_l_linenumber, + ANY_VALUE(_t6.l_orderkey) AS anything_l_orderkey, + ANY_VALUE(_t6.l_suppkey) AS anything_l_suppkey, ANY_VALUE(orders.o_orderkey) AS anything_o_orderkey, ANY_VALUE(orders.o_orderstatus) AS anything_o_orderstatus - FROM _t7 AS _t7 + FROM _t6 AS _t6 JOIN tpch.orders AS orders - ON _t7.l_orderkey = orders.o_orderkey + ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t7.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey + ON _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey GROUP BY - _t7.l_linenumber, - _t7.l_orderkey, + _t6.l_linenumber, + _t6.l_orderkey, orders.o_orderkey ), _s11 AS ( SELECT - _t9.l_linenumber, - _t9.l_orderkey, + _t8.l_linenumber, + _t8.l_orderkey, orders.o_orderkey - FROM _t7 AS _t9 + FROM _t6 AS _t8 JOIN tpch.orders AS orders - ON _t9.l_orderkey = orders.o_orderkey + ON _t8.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t9.l_suppkey <> lineitem.l_suppkey + ON _t8.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate AND lineitem.l_orderkey = orders.o_orderkey ), _s13 AS ( SELECT COUNT(*) AS n_rows, - _t4.anything_l_suppkey - FROM _t4 AS _t4 + _t3.anything_l_suppkey + FROM _t3 AS _t3 JOIN _s11 AS _s11 - ON _s11.l_linenumber = _t4.anything_l_linenumber - AND _s11.l_orderkey = _t4.anything_l_orderkey - AND _s11.o_orderkey = _t4.anything_o_orderkey + ON _s11.l_linenumber = _t3.anything_l_linenumber + AND _s11.l_orderkey = _t3.anything_l_orderkey + AND _s11.o_orderkey = _t3.anything_o_orderkey WHERE - _t4.anything_o_orderstatus = 'F' + _t3.anything_o_orderstatus = 'F' GROUP BY - _t4.anything_l_suppkey + _t3.anything_l_suppkey ) SELECT supplier.s_name AS S_NAME, @@ -59,6 +59,6 @@ JOIN tpch.nation AS nation LEFT JOIN _s13 AS _s13 ON _s13.anything_l_suppkey = supplier.s_suppkey ORDER BY - numwait DESC, + COALESCE(_s13.n_rows, 0) DESC, s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index fd7da4b57..7e0ec8787 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t7 AS ( +WITH _t6 AS ( SELECT l_commitdate, l_linenumber, @@ -8,51 +8,51 @@ WITH _t7 AS ( FROM tpch.lineitem WHERE l_commitdate < l_receiptdate -), _t4 AS ( +), _t3 AS ( SELECT - MAX(_t7.l_linenumber) AS anything_l_linenumber, - MAX(_t7.l_orderkey) AS anything_l_orderkey, - MAX(_t7.l_suppkey) AS anything_l_suppkey, + MAX(_t6.l_linenumber) AS anything_l_linenumber, + MAX(_t6.l_orderkey) AS anything_l_orderkey, + MAX(_t6.l_suppkey) AS anything_l_suppkey, MAX(orders.o_orderkey) AS anything_o_orderkey, MAX(orders.o_orderstatus) AS anything_o_orderstatus - FROM _t7 AS _t7 + FROM _t6 AS _t6 JOIN tpch.orders AS orders - ON _t7.l_orderkey = orders.o_orderkey + ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t7.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey + ON _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey GROUP BY - _t7.l_linenumber, - _t7.l_orderkey, + _t6.l_linenumber, + _t6.l_orderkey, orders.o_orderkey ), _u_0 AS ( SELECT - _t9.l_linenumber AS _u_1, - _t9.l_orderkey AS _u_2, + _t8.l_linenumber AS _u_1, + _t8.l_orderkey AS _u_2, orders.o_orderkey AS _u_3 - FROM _t7 AS _t9 + FROM _t6 AS _t8 JOIN tpch.orders AS orders - ON _t9.l_orderkey = orders.o_orderkey + ON _t8.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t9.l_suppkey <> lineitem.l_suppkey + ON _t8.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate AND lineitem.l_orderkey = orders.o_orderkey GROUP BY - _t9.l_linenumber, - _t9.l_orderkey, + _t8.l_linenumber, + _t8.l_orderkey, orders.o_orderkey ), _s13 AS ( SELECT COUNT(*) AS n_rows, - _t4.anything_l_suppkey - FROM _t4 AS _t4 + _t3.anything_l_suppkey + FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 - ON _t4.anything_l_linenumber = _u_0._u_1 - AND _t4.anything_l_orderkey = _u_0._u_2 - AND _t4.anything_o_orderkey = _u_0._u_3 + ON _t3.anything_l_linenumber = _u_0._u_1 + AND _t3.anything_l_orderkey = _u_0._u_2 + AND _t3.anything_o_orderkey = _u_0._u_3 WHERE - _t4.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL + _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL GROUP BY - _t4.anything_l_suppkey + _t3.anything_l_suppkey ) SELECT supplier.s_name AS S_NAME, @@ -63,6 +63,6 @@ JOIN tpch.nation AS nation LEFT JOIN _s13 AS _s13 ON _s13.anything_l_suppkey = supplier.s_suppkey ORDER BY - numwait DESC, + COALESCE(_s13.n_rows, 0) DESC, s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q2_ansi.sql b/tests/test_sql_refsols/tpch_q2_ansi.sql index 003040077..07e6da4ff 100644 --- a/tests/test_sql_refsols/tpch_q2_ansi.sql +++ b/tests/test_sql_refsols/tpch_q2_ansi.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT nation.n_name, part.p_mfgr, @@ -31,7 +31,7 @@ SELECT s_address AS S_ADDRESS, s_phone AS S_PHONE, s_comment AS S_COMMENT -FROM _t0 +FROM _t1 ORDER BY s_acctbal DESC, n_name, diff --git a/tests/test_sql_refsols/tpch_q3_ansi.sql b/tests/test_sql_refsols/tpch_q3_ansi.sql index f50a43d15..6511e3f13 100644 --- a/tests/test_sql_refsols/tpch_q3_ansi.sql +++ b/tests/test_sql_refsols/tpch_q3_ansi.sql @@ -18,7 +18,9 @@ GROUP BY orders.o_orderdate, orders.o_shippriority ORDER BY - revenue DESC, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) DESC, o_orderdate, l_orderkey LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q3_sqlite.sql b/tests/test_sql_refsols/tpch_q3_sqlite.sql index 7fa133e3b..d23483f4d 100644 --- a/tests/test_sql_refsols/tpch_q3_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q3_sqlite.sql @@ -17,7 +17,9 @@ GROUP BY orders.o_orderdate, orders.o_shippriority ORDER BY - revenue DESC, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) DESC, o_orderdate, l_orderkey LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q9_ansi.sql b/tests/test_sql_refsols/tpch_q9_ansi.sql index 99390991a..12fe871b0 100644 --- a/tests/test_sql_refsols/tpch_q9_ansi.sql +++ b/tests/test_sql_refsols/tpch_q9_ansi.sql @@ -1,37 +1,30 @@ -WITH _t0 AS ( - SELECT +SELECT + nation.n_name AS NATION, + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) AS O_YEAR, + COALESCE( SUM( lineitem.l_extendedprice * ( 1 - lineitem.l_discount ) - partsupp.ps_supplycost * lineitem.l_quantity - ) AS sum_value, - nation.n_name, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) AS o_year - FROM tpch.lineitem AS lineitem - JOIN tpch.part AS part - ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' - JOIN tpch.supplier AS supplier - ON lineitem.l_suppkey = supplier.s_suppkey - JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey - JOIN tpch.orders AS orders - ON lineitem.l_orderkey = orders.o_orderkey - JOIN tpch.partsupp AS partsupp - ON lineitem.l_partkey = partsupp.ps_partkey - AND lineitem.l_suppkey = partsupp.ps_suppkey - GROUP BY - nation.n_name, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) - ORDER BY - n_name, - o_year DESC - LIMIT 10 -) -SELECT - n_name AS NATION, - o_year AS O_YEAR, - COALESCE(sum_value, 0) AS AMOUNT -FROM _t0 + ), + 0 + ) AS AMOUNT +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey +GROUP BY + nation.n_name, + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) ORDER BY - n_name, + nation.n_name, o_year DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q9_sqlite.sql b/tests/test_sql_refsols/tpch_q9_sqlite.sql index ac17a15d1..37e726db9 100644 --- a/tests/test_sql_refsols/tpch_q9_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q9_sqlite.sql @@ -1,37 +1,30 @@ -WITH _t0 AS ( - SELECT +SELECT + nation.n_name AS NATION, + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) AS O_YEAR, + COALESCE( SUM( lineitem.l_extendedprice * ( 1 - lineitem.l_discount ) - partsupp.ps_supplycost * lineitem.l_quantity - ) AS sum_value, - nation.n_name, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) AS o_year - FROM tpch.lineitem AS lineitem - JOIN tpch.part AS part - ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' - JOIN tpch.supplier AS supplier - ON lineitem.l_suppkey = supplier.s_suppkey - JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey - JOIN tpch.orders AS orders - ON lineitem.l_orderkey = orders.o_orderkey - JOIN tpch.partsupp AS partsupp - ON lineitem.l_partkey = partsupp.ps_partkey - AND lineitem.l_suppkey = partsupp.ps_suppkey - GROUP BY - nation.n_name, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) - ORDER BY - n_name, - o_year DESC - LIMIT 10 -) -SELECT - n_name AS NATION, - o_year AS O_YEAR, - COALESCE(sum_value, 0) AS AMOUNT -FROM _t0 + ), + 0 + ) AS AMOUNT +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey +GROUP BY + nation.n_name, + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) ORDER BY - n_name, + nation.n_name, o_year DESC +LIMIT 10 From 665a9dd3d5c325d9e0d0a7533c9c40b962126a5a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 13:23:23 -0400 Subject: [PATCH 015/143] Restoring filter modifications --- pydough/conversion/projection_pullup.py | 3 ++ tests/test_plan_refsols/bad_child_reuse_2.txt | 6 +-- tests/test_plan_refsols/bad_child_reuse_3.txt | 6 +-- tests/test_plan_refsols/common_prefix_n.txt | 6 +-- tests/test_plan_refsols/common_prefix_o.txt | 6 +-- tests/test_plan_refsols/correl_14.txt | 30 +++++------ tests/test_plan_refsols/correl_15.txt | 36 ++++++------- tests/test_plan_refsols/correl_18.txt | 17 +++--- tests/test_plan_refsols/correl_20.txt | 26 +++++---- tests/test_plan_refsols/correl_24.txt | 14 ++--- .../month_year_sliding_windows.txt | 29 +++++----- .../multi_partition_access_6.txt | 53 +++++++++---------- .../technograph_monthly_incident_rate.txt | 51 +++++++++--------- ..._year_cumulative_incident_rate_overall.txt | 6 +-- tests/test_plan_refsols/tpch_q11.txt | 4 +- tests/test_plan_refsols/tpch_q20.txt | 19 ++++--- tests/test_plan_refsols/tpch_q22.txt | 6 +-- .../window_filter_order_10.txt | 13 +++-- ...technograph_monthly_incident_rate_ansi.sql | 34 ++++++------ ...chnograph_monthly_incident_rate_sqlite.sql | 34 ++++++------ tests/test_sql_refsols/tpch_q11_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q11_sqlite.sql | 4 +- 22 files changed, 200 insertions(+), 207 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 768944058..b126550ad 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -243,6 +243,9 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: if node.join_type == JoinType.INNER: pull_project_into_join(node, 1) return pull_non_columns(node) + case Filter(): + pull_project_into_filter(node) + return pull_non_columns(node) case Limit(): pull_project_into_limit(node) return pull_non_columns(node) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 9aa529377..f9274e077 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 9aa529377..f9274e077 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index fc3a3530b..0235aa2fa 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 1916a60a9..808be5ed1 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index c5da9fde3..feffdfb1a 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,19 +1,17 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) - PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) - FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index aa5568ea8..6d7af3f9c 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,22 +1,20 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) - PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) - FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index e34eb6923..5f6ca684d 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,12 +1,11 @@ ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': SUM(n_above_avg)}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_above_avg': COUNT()}) - FILTER(condition=o_totalprice >= 0.5:numeric * total_price_sum, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - PROJECT(columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'total_price_sum': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) - FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) + FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 8c61c789f..a480ec3e3 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,16 +1,14 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=domestic, columns={}) - PROJECT(columns={'domestic': name_16 == n_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'name_16': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index af102dee8..1217865d6 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -4,9 +4,11 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orde JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 69a863b05..d438e6e1e 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,16 +1,15 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) - FILTER(condition=curr_year_total_spent > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'curr_year_total_spent': DEFAULT_TO(sum_month_total_spent, 0:numeric), 'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) + JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) + PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 05b282309..5ee7193e5 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -2,26 +2,25 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=n_ticker_type_trans == 1:numeric | n_cust_type_trans == 1:numeric, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_cust_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - PROJECT(columns={'n_cust_trans': DEFAULT_TO(sum_n_cust_type_trans, 0:numeric), 'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) @@ -41,21 +40,19 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) - PROJECT(columns={'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_ticker_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 6f30065aa..8aa3a87db 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,29 +1,28 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'month': MONTH(ca_dt), 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 25ad10b87..4c754cbe5 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) - FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'n_incidents': n_incidents, 'year': year}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) +ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) + FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) + PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 1c7b826c2..633b50afb 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last], limit=10:numeric) - FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, 'ps_partkey': ps_partkey}) + FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) PROJECT(columns={'metric': ps_supplycost * ps_availqty}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index fc41ee839..245e3ef0e 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -6,13 +6,12 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index ff832eb90..60e4e77d6 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -7,10 +7,8 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) - FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - PROJECT(columns={'c_acctbal': c_acctbal, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 96e986806..28100ecde 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,9 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[NULL_1], partition=[], order=[]), columns={}) - PROJECT(columns={'NULL_1': None:unknown, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) - FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 3c42321e0..a80b170ef 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t4 AS ( SELECT ca_dt FROM main.calendar @@ -15,7 +15,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, _t6.ca_dt - FROM _t3 AS _t6 + FROM _t4 AS _t6 JOIN main.calendar AS calendar ON calendar.ca_dt >= DATE_ADD(CAST(_t6.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices @@ -27,25 +27,25 @@ WITH _t3 AS ( ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t10.ca_dt - FROM _t3 AS _t10 + _t9.ca_dt + FROM _t4 AS _t9 JOIN main.incidents AS incidents - ON _t10.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _t9.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t11 - ON _t11.co_id = devices.de_production_country_id + JOIN _t7 AS _t10 + ON _t10.co_id = devices.de_production_country_id GROUP BY - _t10.ca_dt + _t9.ca_dt ) SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t3 AS _t3 +FROM _t4 AS _t4 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t3.ca_dt + ON _s7.ca_dt = _t4.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t3.ca_dt + ON _s15.ca_dt = _t4.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index 1c08c7d2e..de7f0b427 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t4 AS ( SELECT ca_dt FROM main.calendar @@ -15,7 +15,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, _t6.ca_dt - FROM _t3 AS _t6 + FROM _t4 AS _t6 JOIN main.calendar AS calendar ON calendar.ca_dt >= DATETIME(_t6.ca_dt, '-6 month') JOIN main.devices AS devices @@ -27,25 +27,25 @@ WITH _t3 AS ( ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t10.ca_dt - FROM _t3 AS _t10 + _t9.ca_dt + FROM _t4 AS _t9 JOIN main.incidents AS incidents - ON _t10.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _t9.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t11 - ON _t11.co_id = devices.de_production_country_id + JOIN _t7 AS _t10 + ON _t10.co_id = devices.de_production_country_id GROUP BY - _t10.ca_dt + _t9.ca_dt ) SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t3 AS _t3 +FROM _t4 AS _t4 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t3.ca_dt + ON _s7.ca_dt = _t4.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t3.ca_dt + ON _s15.ca_dt = _t4.ca_dt GROUP BY - CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER) + CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index 53460ad26..f76f36d96 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -35,7 +35,9 @@ SELECT COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON COALESCE(_s8.sum_metric, 0) * 0.0001 < COALESCE(_s9.sum_expr_2, 0) + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index 53460ad26..f76f36d96 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -35,7 +35,9 @@ SELECT COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON COALESCE(_s8.sum_metric, 0) * 0.0001 < COALESCE(_s9.sum_expr_2, 0) + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 From d1fe25bf9f281c3adc49bbffa605f5af51211862 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 13:45:44 -0400 Subject: [PATCH 016/143] Started aggregation project pullup --- pydough/conversion/agg_split.py | 2 +- pydough/conversion/column_bubbler.py | 9 ++-- pydough/conversion/projection_pullup.py | 53 +++++++++++++++++++ pydough/conversion/relational_converter.py | 2 +- .../relational/relational_nodes/aggregate.py | 16 +++--- .../agg_orders_by_year_month_basic.txt | 5 +- .../agg_orders_by_year_month_just_europe.txt | 24 ++++----- .../agg_orders_by_year_month_vs_europe.txt | 24 ++++----- tests/test_plan_refsols/agg_partition.txt | 5 +- .../aggregate_mixed_levels_simple.txt | 9 ++-- .../aggregate_on_function_call.txt | 5 +- .../aggregation_analytics_1.txt | 23 ++++---- .../aggregation_analytics_2.txt | 23 ++++---- .../aggregation_analytics_3.txt | 23 ++++---- .../test_plan_refsols/avg_acctbal_wo_debt.txt | 5 +- tests/test_plan_refsols/common_prefix_a.txt | 11 ++-- tests/test_plan_refsols/common_prefix_ag.txt | 41 +++++++------- tests/test_plan_refsols/common_prefix_ah.txt | 41 +++++++------- tests/test_plan_refsols/common_prefix_ai.txt | 41 +++++++------- tests/test_plan_refsols/common_prefix_aj.txt | 41 +++++++------- tests/test_plan_refsols/common_prefix_b.txt | 19 ++++--- tests/test_plan_refsols/common_prefix_c.txt | 32 ++++++----- tests/test_plan_refsols/common_prefix_d.txt | 35 ++++++------ tests/test_plan_refsols/common_prefix_e.txt | 11 ++-- tests/test_plan_refsols/common_prefix_f.txt | 19 ++++--- tests/test_plan_refsols/common_prefix_g.txt | 19 ++++--- tests/test_plan_refsols/common_prefix_h.txt | 32 ++++++----- tests/test_plan_refsols/correl_14.txt | 9 ++-- tests/test_plan_refsols/correl_15.txt | 9 ++-- tests/test_plan_refsols/correl_24.txt | 9 ++-- tests/test_plan_refsols/correl_26.txt | 27 +++++----- tests/test_plan_refsols/correl_27.txt | 25 +++++---- tests/test_plan_refsols/correl_28.txt | 21 ++++---- tests/test_plan_refsols/correl_30.txt | 29 +++++----- tests/test_plan_refsols/correl_31.txt | 31 ++++++----- .../count_cust_supplier_nation_combos.txt | 19 ++++--- .../customer_largest_order_deltas.txt | 7 ++- tests/test_plan_refsols/double_partition.txt | 5 +- .../epoch_intra_season_searches.txt | 42 +++++++-------- .../global_acctbal_breakdown.txt | 5 +- .../highest_priority_per_year.txt | 5 +- .../month_year_sliding_windows.txt | 17 +++--- .../nation_acctbal_breakdown.txt | 5 +- .../odate_and_rdate_avggap.txt | 11 ++-- .../region_acctbal_breakdown.txt | 9 ++-- .../simple_var_std_with_nulls.txt | 7 ++- .../sqlite_udf_combine_strings.txt | 19 +++---- .../sqlite_udf_covar_pop.txt | 17 +++--- tests/test_plan_refsols/sqlite_udf_nested.txt | 15 +++--- .../sqlite_udf_percent_epsilon.txt | 9 ++-- .../sqlite_udf_percent_positive.txt | 16 +++--- tests/test_plan_refsols/sqlite_udf_relmin.txt | 7 ++- ...ograph_battery_failure_rates_anomalies.txt | 25 +++++---- ..._error_rate_sun_set_by_factory_country.txt | 17 +++--- ...hnograph_incident_rate_by_release_year.txt | 22 ++++---- .../technograph_incident_rate_per_brand.txt | 15 +++--- .../technograph_monthly_incident_rate.txt | 4 +- .../technograph_most_unreliable_products.txt | 15 +++--- ...umulative_incident_rate_goldcopperstar.txt | 4 +- ..._year_cumulative_incident_rate_overall.txt | 4 +- tests/test_plan_refsols/tpch_q1.txt | 7 ++- tests/test_plan_refsols/tpch_q10.txt | 13 +++-- tests/test_plan_refsols/tpch_q11.txt | 30 +++++------ tests/test_plan_refsols/tpch_q12.txt | 11 ++-- tests/test_plan_refsols/tpch_q13.txt | 13 +++-- tests/test_plan_refsols/tpch_q14.txt | 11 ++-- tests/test_plan_refsols/tpch_q15.txt | 21 ++++---- tests/test_plan_refsols/tpch_q19.txt | 13 +++-- tests/test_plan_refsols/tpch_q3.txt | 19 ++++--- tests/test_plan_refsols/tpch_q5.txt | 33 ++++++------ tests/test_plan_refsols/tpch_q6.txt | 7 ++- tests/test_plan_refsols/tpch_q7.txt | 4 +- tests/test_plan_refsols/tpch_q8.txt | 39 +++++++------- tests/test_plan_refsols/tpch_q9.txt | 27 +++++----- tests/test_plan_refsols/triple_partition.txt | 47 ++++++++-------- .../year_month_nation_orders.txt | 21 ++++---- .../yoy_change_in_num_orders.txt | 5 +- .../defog_broker_adv5_ansi.sql | 2 +- .../defog_broker_adv5_sqlite.sql | 2 +- .../defog_broker_adv7_ansi.sql | 4 +- .../defog_broker_adv7_sqlite.sql | 4 +- .../defog_dealership_gen4_ansi.sql | 8 +-- .../defog_dealership_gen4_sqlite.sql | 10 ++-- .../sqlite_udf_combine_strings_sqlite.sql | 4 +- .../sqlite_udf_covar_pop_sqlite.sql | 14 ++--- .../sqlite_udf_nested_sqlite.sql | 4 +- .../sqlite_udf_percent_epsilon_sqlite.sql | 4 +- .../sqlite_udf_relmin_sqlite.sql | 4 +- ...aph_incident_rate_by_release_year_ansi.sql | 4 +- ...h_incident_rate_by_release_year_sqlite.sql | 4 +- ...technograph_monthly_incident_rate_ansi.sql | 46 ++++++++-------- ...chnograph_monthly_incident_rate_sqlite.sql | 46 ++++++++-------- ...tive_incident_rate_goldcopperstar_ansi.sql | 10 ++-- ...ve_incident_rate_goldcopperstar_sqlite.sql | 10 ++-- tests/test_sql_refsols/tpch_q11_ansi.sql | 10 ++-- tests/test_sql_refsols/tpch_q11_sqlite.sql | 10 ++-- tests/test_sql_refsols/tpch_q15_ansi.sql | 6 +-- tests/test_sql_refsols/tpch_q15_sqlite.sql | 6 +-- tests/test_sql_refsols/tpch_q7_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q7_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q9_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q9_sqlite.sql | 4 +- 102 files changed, 779 insertions(+), 817 deletions(-) diff --git a/pydough/conversion/agg_split.py b/pydough/conversion/agg_split.py index 9879c37c5..66745f923 100644 --- a/pydough/conversion/agg_split.py +++ b/pydough/conversion/agg_split.py @@ -226,7 +226,7 @@ def transpose_aggregate_join( # Derive which columns are used as aggregate keys by # the input. - input_keys: dict[str, ColumnReference] = {} + input_keys: dict[str, RelationalExpression] = {} for ref in side_keys: input_keys[ref.name] = ref.with_input(None) for agg_key in node.keys.values(): diff --git a/pydough/conversion/column_bubbler.py b/pydough/conversion/column_bubbler.py index d5d0bc131..6f5b68017 100644 --- a/pydough/conversion/column_bubbler.py +++ b/pydough/conversion/column_bubbler.py @@ -188,16 +188,19 @@ def run_column_bubbling( # For aggregate, do the same as projection but run separately for # keys and aggregations. new_input, input_mapping = run_column_bubbling(node.input, corr_remap) - new_keys: dict[str, ColumnReference] = {} + new_keys: dict[str, RelationalExpression] = {} new_aggs: dict[str, CallExpression] = {} for name, key_expr in node.keys.items(): new_expr = apply_substitution(key_expr, input_mapping, corr_remap) - assert isinstance(new_expr, ColumnReference) new_ref = ColumnReference(name, key_expr.data_type) if new_expr in aliases: remapping[new_ref] = aliases[new_expr] else: - if new_expr.name != name and new_expr.name not in used_names: + if ( + isinstance(new_expr, ColumnReference) + and new_expr.name != name + and new_expr.name not in used_names + ): used_names.add(new_expr.name) alt_ref = ColumnReference(new_expr.name, new_expr.data_type) remapping[new_ref] = alt_ref diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index b126550ad..4e353c914 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -8,6 +8,8 @@ from pydough.relational import ( + Aggregate, + CallExpression, ColumnReference, ExpressionSortInfo, Filter, @@ -229,6 +231,55 @@ def pull_project_into_limit(node: Limit) -> None: ] +def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: + """ + TODO + """ + if not isinstance(node.input, Project): + return node + + project: Project = node.input + + finder: ColumnReferenceFinder = ColumnReferenceFinder() + finder.reset() + for key_expr in node.aggregations.values(): + key_expr.accept(finder) + agg_cols: set[ColumnReference] = finder.get_column_references() + agg_names: set[str] = {col.name for col in agg_cols} + finder.reset() + for agg_expr in node.keys.values(): + agg_expr.accept(finder) + key_cols: set[ColumnReference] = finder.get_column_references() + key_names: set[str] = {col.name for col in key_cols} + + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) + substitutions: dict[RelationalExpression, RelationalExpression] = {} + new_expr: RelationalExpression + for name, expr in project.columns.items(): + new_expr = apply_substitution(expr, transfer_substitutions, {}) + if (not contains_window(new_expr)) and ( + (name in agg_names) != (name in key_names) + ): + ref_expr: ColumnReference = ColumnReference(name, expr.data_type) + substitutions[ref_expr] = new_expr + new_keys: dict[str, RelationalExpression] = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.keys.items() + } + new_aggs: dict[str, CallExpression] = {} + for name, expr in node.aggregations.items(): + new_expr = apply_substitution(expr, substitutions, {}) + assert isinstance(new_expr, CallExpression) + new_aggs[name] = new_expr + return Aggregate( + input=node.input, + keys=new_keys, + aggregations=new_aggs, + ) + + def pullup_projections(node: RelationalNode) -> RelationalNode: """ TODO @@ -249,5 +300,7 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: case Limit(): pull_project_into_limit(node) return pull_non_columns(node) + case Aggregate(): + return pull_project_into_aggregate(node) case _: return node diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 66754ba96..ac67d9852 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -588,7 +588,7 @@ def apply_aggregations( ConnectionType.NO_MATCH_AGGREGATION, ) out_columns: dict[HybridExpr, ColumnReference] = {} - keys: dict[str, ColumnReference] = {} + keys: dict[str, RelationalExpression] = {} aggregations: dict[str, CallExpression] = {} used_names: set[str] = set() # First, propagate all key columns into the output, and add them to diff --git a/pydough/relational/relational_nodes/aggregate.py b/pydough/relational/relational_nodes/aggregate.py index 94cf63bb8..8fe953577 100644 --- a/pydough/relational/relational_nodes/aggregate.py +++ b/pydough/relational/relational_nodes/aggregate.py @@ -6,7 +6,6 @@ from pydough.relational.relational_expressions import ( CallExpression, - ColumnReference, RelationalExpression, ) @@ -24,7 +23,7 @@ class Aggregate(SingleRelational): def __init__( self, input: RelationalNode, - keys: dict[str, ColumnReference], + keys: dict[str, RelationalExpression], aggregations: dict[str, CallExpression], ) -> None: total_cols: dict[str, RelationalExpression] = {**keys, **aggregations} @@ -32,14 +31,14 @@ def __init__( "Keys and aggregations must have unique names" ) super().__init__(input, total_cols) - self._keys: dict[str, ColumnReference] = keys + self._keys: dict[str, RelationalExpression] = keys self._aggregations: dict[str, CallExpression] = aggregations assert all(agg.is_aggregation for agg in aggregations.values()), ( "All functions used in aggregations must be aggregation functions" ) @property - def keys(self) -> dict[str, ColumnReference]: + def keys(self) -> dict[str, RelationalExpression]: """ The keys for the aggregation operation. """ @@ -78,11 +77,8 @@ def node_copy( keys = {} aggregations = {} for key, val in columns.items(): - if isinstance(val, ColumnReference): - keys[key] = val - else: - assert isinstance(val, CallExpression), ( - "All columns must be references or functions" - ) + if isinstance(val, CallExpression) and val.op.is_aggregation: aggregations[key] = val + else: + keys[key] = val return Aggregate(inputs[0], keys, aggregations) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_basic.txt b/tests/test_plan_refsols/agg_orders_by_year_month_basic.txt index cee7d4ebd..e59cef716 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_basic.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_basic.txt @@ -1,4 +1,3 @@ ROOT(columns=[('year', year), ('month', month), ('total_orders', total_orders)], orderings=[]) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'total_orders': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'total_orders': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt index e4eb7a12f..91f0efac3 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt @@ -1,15 +1,13 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[]) JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt index 8279c92ac..3c30882d8 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt @@ -1,15 +1,13 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', n_rows), ('total_orders', DEFAULT_TO(agg_1, 0:numeric))], orderings=[]) JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t1.n_rows, 'month': t0.month, 'n_rows': t0.n_rows, 'year': t0.year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_partition.txt b/tests/test_plan_refsols/agg_partition.txt index 9532d9962..9b0bc75da 100644 --- a/tests/test_plan_refsols/agg_partition.txt +++ b/tests/test_plan_refsols/agg_partition.txt @@ -1,5 +1,4 @@ ROOT(columns=[('best_year', best_year)], orderings=[]) AGGREGATE(keys={}, aggregations={'best_year': MAX(n_orders)}) - AGGREGATE(keys={'year': year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index 263763bd7..4efebacf2 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,8 +1,7 @@ ROOT(columns=[('order_key', o_orderkey), ('max_ratio', max_ratio)], orderings=[]) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(ratio)}) - PROJECT(columns={'l_orderkey': l_orderkey, 'ratio': l_quantity / ps_availqty}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_on_function_call.txt b/tests/test_plan_refsols/aggregate_on_function_call.txt index 6ab7d55c8..3e68d185b 100644 --- a/tests/test_plan_refsols/aggregate_on_function_call.txt +++ b/tests/test_plan_refsols/aggregate_on_function_call.txt @@ -1,6 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('avg_consumer_value', avg_consumer_value)], orderings=[]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(expr_1)}) - PROJECT(columns={'c_nationkey': c_nationkey, 'expr_1': IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_1.txt b/tests/test_plan_refsols/aggregation_analytics_1.txt index f83dbaec8..e77367f86 100644 --- a/tests/test_plan_refsols/aggregation_analytics_1.txt +++ b/tests/test_plan_refsols/aggregation_analytics_1.txt @@ -7,15 +7,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_ SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 2be73d9d4..af9436cf0 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,15 +1,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 0945b2982..cf4d7e4f7 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,15 +1,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt index 7bc4311aa..c08e7d33f 100644 --- a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt +++ b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt @@ -4,6 +4,5 @@ ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', sum_sum_ AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(expr_1), 'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'c_nationkey': c_nationkey, 'expr_1': LARGEST(c_acctbal, 0:numeric)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(LARGEST(c_acctbal, 0:numeric)), 'sum_expr_1': SUM(LARGEST(c_acctbal, 0:numeric))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index 94e733398..e00d11d91 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0)}) - PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(1:numeric)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index a2ecdb1b4..e57b541ed 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -21,24 +21,23 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index e5ee9d7a2..a2d4305ca 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -12,24 +12,23 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', DEFAUL SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index d544d0cf7..f28e37a55 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -9,24 +9,23 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index b6ea23a97..4df2fe150 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -21,24 +21,23 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 4d1bb2447..6d74fea0f 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) - PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(1:numeric), 'n_suppliers': SUM(n_suppliers)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 953b939f0..e9ba7c35d 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -2,20 +2,18 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(1:numeric), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 6676f3f68..1bb9d040a 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -2,25 +2,24 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows, 'sum_n_rows_2': sum_n_rows_2}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(1:numeric), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index ba7632a6d..bf725b8e1 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(1:numeric)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index dd8ca64e5..a30d8068c 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_sum_n_rows)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(1:numeric), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 848a95bb5..d4b65fe8d 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_suppliers), ('n_nations', sum_agg_2)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) - PROJECT(columns={'agg_2': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(1:numeric)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index cda098921..c777a3423 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -2,20 +2,18 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_T JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_0, 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index feffdfb1a..93bd2de6c 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -7,11 +7,10 @@ ROOT(columns=[('n', n)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 6d7af3f9c..264641ef8 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -10,11 +10,10 @@ ROOT(columns=[('n', n_rows)], orderings=[]) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 1217865d6..0afce8592 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -3,11 +3,10 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orde FILTER(condition=MONOTONIC(prev_month_avg_price, o_totalprice, avg_o_totalprice) | MONOTONIC(avg_o_totalprice, o_totalprice, prev_month_avg_price), columns={'month': month, 'year': year}) JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index e7c7cde56..6c7d2bd5a 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,20 +1,19 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) - PROJECT(columns={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(1:numeric), 'nation_name_0': ANYTHING(n_name)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index ac9583af3..9bca98b3e 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -2,19 +2,18 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) - PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(1:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 3f6839aaf..f83a5a536 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -2,17 +2,16 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) - PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(1:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 9f4248442..2e633f4f9 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,20 +1,19 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': LOWER(r_name)}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index c868fedf4..a4cd83a96 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,17 +1,16 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(revenue), 'median_rev': MEDIAN(revenue), 'nation_name': ANYTHING(n_name)}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(l_extendedprice * 1:numeric - l_discount), 'median_rev': MEDIAN(l_extendedprice * 1:numeric - l_discount), 'nation_name': ANYTHING(n_name)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index 21bb4c0e6..a1e3920bd 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -5,16 +5,15 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': year}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - PROJECT(columns={'agg_0': 1:numeric, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'sum_l_extendedprice': sum_l_extendedprice, 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': SUM(1:numeric), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 97aba3b36..c85560614 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -8,7 +8,6 @@ ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_diff) > max_diff, m JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(r)}) - PROJECT(columns={'l_orderkey': l_orderkey, 'r': l_extendedprice * 1:numeric - l_discount}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) diff --git a/tests/test_plan_refsols/double_partition.txt b/tests/test_plan_refsols/double_partition.txt index 28d3238b9..4a8f45c91 100644 --- a/tests/test_plan_refsols/double_partition.txt +++ b/tests/test_plan_refsols/double_partition.txt @@ -1,5 +1,4 @@ ROOT(columns=[('year', year), ('best_month', best_month)], orderings=[]) AGGREGATE(keys={'year': year}, aggregations={'best_month': MAX(n_orders)}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index a39171a35..0e76d9871 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,25 +1,23 @@ ROOT(columns=[('season_name', anything_s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(anything_s_name):asc_first]) JOIN(condition=t0.anything_s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'anything_s_name': t0.anything_s_name, 'n_rows': t1.n_rows, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) - PROJECT(columns={'is_intra_season': DEFAULT_TO(n_rows, 0:numeric) > 0:numeric, 's_name': s_name}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) - AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) - PROJECT(columns={'is_intra_season': name_9 == s_name, 's_name': s_name}) - JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) + AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(name_9 == s_name)}) + JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/global_acctbal_breakdown.txt b/tests/test_plan_refsols/global_acctbal_breakdown.txt index 0625bd292..849cf5736 100644 --- a/tests/test_plan_refsols/global_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/global_acctbal_breakdown.txt @@ -1,4 +1,3 @@ ROOT(columns=[('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[]) - AGGREGATE(keys={}, aggregations={'median_black_acctbal': MEDIAN(non_negative_acctbal), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(negative_acctbal), 'n_black_acctbal': COUNT(non_negative_acctbal), 'n_red_acctbal': COUNT(negative_acctbal)}) - PROJECT(columns={'c_acctbal': c_acctbal, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) diff --git a/tests/test_plan_refsols/highest_priority_per_year.txt b/tests/test_plan_refsols/highest_priority_per_year.txt index 2ea818a88..68046f37a 100644 --- a/tests/test_plan_refsols/highest_priority_per_year.txt +++ b/tests/test_plan_refsols/highest_priority_per_year.txt @@ -1,6 +1,5 @@ ROOT(columns=[('order_year', order_year), ('highest_priority', o_orderpriority), ('priority_pct', priority_pct)], orderings=[(order_year):asc_first]) FILTER(condition=RANKING(args=[], partition=[order_year], order=[(priority_pct):desc_first]) == 1:numeric, columns={'o_orderpriority': o_orderpriority, 'order_year': order_year, 'priority_pct': priority_pct}) PROJECT(columns={'o_orderpriority': o_orderpriority, 'order_year': order_year, 'priority_pct': 100.0:numeric * n_orders / RELSUM(args=[n_orders], partition=[order_year], order=[])}) - AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'o_orderpriority': o_orderpriority, 'order_year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'order_year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index d438e6e1e..333521c0c 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -3,13 +3,10 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (m JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(DEFAULT_TO(sum_o_totalprice, 0:numeric))}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/nation_acctbal_breakdown.txt b/tests/test_plan_refsols/nation_acctbal_breakdown.txt index 4de6c527a..977b01033 100644 --- a/tests/test_plan_refsols/nation_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/nation_acctbal_breakdown.txt @@ -4,6 +4,5 @@ ROOT(columns=[('nation_name', n_name), ('n_red_acctbal', n_red_acctbal), ('n_bla SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'median_black_acctbal': MEDIAN(non_negative_acctbal), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(negative_acctbal), 'n_black_acctbal': COUNT(non_negative_acctbal), 'n_red_acctbal': COUNT(negative_acctbal)}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/odate_and_rdate_avggap.txt b/tests/test_plan_refsols/odate_and_rdate_avggap.txt index fef1cca7d..a556f2e22 100644 --- a/tests/test_plan_refsols/odate_and_rdate_avggap.txt +++ b/tests/test_plan_refsols/odate_and_rdate_avggap.txt @@ -1,7 +1,6 @@ ROOT(columns=[('avg_gap', avg_gap)], orderings=[]) - AGGREGATE(keys={}, aggregations={'avg_gap': AVG(day_gap)}) - PROJECT(columns={'day_gap': DATEDIFF('days':string, o_orderdate, SMALLEST(l_commitdate, l_receiptdate))}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={}, aggregations={'avg_gap': AVG(DATEDIFF('days':string, o_orderdate, SMALLEST(l_commitdate, l_receiptdate)))}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/region_acctbal_breakdown.txt b/tests/test_plan_refsols/region_acctbal_breakdown.txt index 1dd3998d9..d58e9d39e 100644 --- a/tests/test_plan_refsols/region_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/region_acctbal_breakdown.txt @@ -1,8 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(non_negative_acctbal), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(negative_acctbal), 'n_black_acctbal': COUNT(non_negative_acctbal), 'n_red_acctbal': COUNT(negative_acctbal)}) - PROJECT(columns={'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/simple_var_std_with_nulls.txt b/tests/test_plan_refsols/simple_var_std_with_nulls.txt index 85f3089c8..05e709ed1 100644 --- a/tests/test_plan_refsols/simple_var_std_with_nulls.txt +++ b/tests/test_plan_refsols/simple_var_std_with_nulls.txt @@ -1,5 +1,4 @@ ROOT(columns=[('var_samp_0_nnull', var_samp_0_nnull), ('var_samp_1_nnull', var_samp_1_nnull), ('var_samp_2_nnull', var_samp_2_nnull), ('var_pop_0_nnull', var_pop_0_nnull), ('var_pop_1_nnull', var_pop_1_nnull), ('var_pop_2_nnull', var_pop_2_nnull), ('std_samp_0_nnull', std_samp_0_nnull), ('std_samp_1_nnull', std_samp_1_nnull), ('std_samp_2_nnull', std_samp_2_nnull), ('std_pop_0_nnull', std_pop_0_nnull), ('std_pop_1_nnull', std_pop_1_nnull), ('std_pop_2_nnull', std_pop_2_nnull)], orderings=[]) - AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(key_0), 'std_pop_1_nnull': POPULATION_STD(key_1), 'std_pop_2_nnull': POPULATION_STD(key_2), 'std_samp_0_nnull': SAMPLE_STD(key_0), 'std_samp_1_nnull': SAMPLE_STD(key_1), 'std_samp_2_nnull': SAMPLE_STD(key_2), 'var_pop_0_nnull': POPULATION_VARIANCE(key_0), 'var_pop_1_nnull': POPULATION_VARIANCE(key_1), 'var_pop_2_nnull': POPULATION_VARIANCE(key_2), 'var_samp_0_nnull': SAMPLE_VARIANCE(key_0), 'var_samp_1_nnull': SAMPLE_VARIANCE(key_1), 'var_samp_2_nnull': SAMPLE_VARIANCE(key_2)}) - PROJECT(columns={'key_0': KEEP_IF(c_acctbal, c_custkey > 3:numeric), 'key_1': KEEP_IF(c_acctbal, c_custkey > 2:numeric), 'key_2': KEEP_IF(c_acctbal, c_custkey > 1:numeric)}) - FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) + AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_pop_1_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_pop_2_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'std_samp_0_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_samp_1_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_samp_2_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_pop_0_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_pop_1_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_pop_2_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_samp_0_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_samp_1_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_samp_2_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 1:numeric))}) + FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_combine_strings.txt b/tests/test_plan_refsols/sqlite_udf_combine_strings.txt index 7669d2e75..a0c333d7e 100644 --- a/tests/test_plan_refsols/sqlite_udf_combine_strings.txt +++ b/tests/test_plan_refsols/sqlite_udf_combine_strings.txt @@ -1,14 +1,11 @@ ROOT(columns=[('s1', combine_strings_r_name), ('s2', agg_1), ('s3', agg_2), ('s4', agg_3)], orderings=[]) JOIN(condition=True:bool, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.agg_3, 'combine_strings_r_name': t0.combine_strings_r_name}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.agg_2, 'combine_strings_r_name': t0.combine_strings_r_name}) - AGGREGATE(keys={}, aggregations={'agg_1': COMBINE_STRINGS(n, ', ':string), 'combine_strings_r_name': COMBINE_STRINGS(r_name)}) - PROJECT(columns={'n': KEEP_IF(r_name, r_name != 'EUROPE':string), 'r_name': r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) - AGGREGATE(keys={}, aggregations={'agg_2': COMBINE_STRINGS(expr_4, '':string)}) - PROJECT(columns={'expr_4': SLICE(n_name, None:unknown, 1:numeric, None:unknown)}) - SCAN(table=tpch.NATION, columns={'n_name': n_name}) - AGGREGATE(keys={}, aggregations={'agg_3': COMBINE_STRINGS(expr_5, ' <=> ':string)}) - PROJECT(columns={'expr_5': SLICE(o_orderpriority, 2:numeric, None:unknown, None:unknown)}) - AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={}, aggregations={'agg_1': COMBINE_STRINGS(KEEP_IF(r_name, r_name != 'EUROPE':string), ', ':string), 'combine_strings_r_name': COMBINE_STRINGS(r_name)}) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) + AGGREGATE(keys={}, aggregations={'agg_2': COMBINE_STRINGS(SLICE(n_name, None:unknown, 1:numeric, None:unknown), '':string)}) + SCAN(table=tpch.NATION, columns={'n_name': n_name}) + AGGREGATE(keys={}, aggregations={'agg_3': COMBINE_STRINGS(SLICE(o_orderpriority, 2:numeric, None:unknown, None:unknown), ' <=> ':string)}) + AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt index 03ef1ee24..8dbd1e7f3 100644 --- a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt +++ b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt @@ -1,12 +1,11 @@ ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', ROUND(agg_0, 3:numeric))], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, expr_1)}) - PROJECT(columns={'c_acctbal': c_acctbal, 'expr_1': o_totalprice / 1000000.0:numeric, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, o_totalprice / 1000000.0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_nested.txt b/tests/test_plan_refsols/sqlite_udf_nested.txt index 08ff9065b..5986f6668 100644 --- a/tests/test_plan_refsols/sqlite_udf_nested.txt +++ b/tests/test_plan_refsols/sqlite_udf_nested.txt @@ -1,9 +1,8 @@ ROOT(columns=[('p', ROUND(percentage_expr_1, 2:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'percentage_expr_1': PERCENTAGE(expr_1)}) - PROJECT(columns={'expr_1': DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'min_o_orderdate': MIN(o_orderdate), 'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={}, aggregations={'percentage_expr_1': PERCENTAGE(DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool))}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'min_o_orderdate': MIN(o_orderdate), 'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt index f102a46f0..30719771b 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt @@ -1,6 +1,5 @@ ROOT(columns=[('pct_e1', ROUND(percentage_expr_5, 4:numeric)), ('pct_e10', ROUND(percentage_expr_6, 4:numeric)), ('pct_e100', ROUND(percentage_expr_7, 4:numeric)), ('pct_e1000', ROUND(percentage_expr_8, 4:numeric)), ('pct_e10000', ROUND(percentage_expr_9, 4:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'percentage_expr_5': PERCENTAGE(expr_5), 'percentage_expr_6': PERCENTAGE(expr_6), 'percentage_expr_7': PERCENTAGE(expr_7), 'percentage_expr_8': PERCENTAGE(expr_8), 'percentage_expr_9': PERCENTAGE(expr_9)}) - PROJECT(columns={'expr_5': EPSILON(o_totalprice, global_avg, 1:numeric), 'expr_6': EPSILON(o_totalprice, global_avg, 10:numeric), 'expr_7': EPSILON(o_totalprice, global_avg, 100:numeric), 'expr_8': EPSILON(o_totalprice, global_avg, 1000:numeric), 'expr_9': EPSILON(o_totalprice, global_avg, 10000:numeric)}) - PROJECT(columns={'global_avg': RELAVG(args=[o_totalprice], partition=[], order=[]), 'o_totalprice': o_totalprice}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={}, aggregations={'percentage_expr_5': PERCENTAGE(EPSILON(o_totalprice, global_avg, 1:numeric)), 'percentage_expr_6': PERCENTAGE(EPSILON(o_totalprice, global_avg, 10:numeric)), 'percentage_expr_7': PERCENTAGE(EPSILON(o_totalprice, global_avg, 100:numeric)), 'percentage_expr_8': PERCENTAGE(EPSILON(o_totalprice, global_avg, 1000:numeric)), 'percentage_expr_9': PERCENTAGE(EPSILON(o_totalprice, global_avg, 10000:numeric))}) + PROJECT(columns={'global_avg': RELAVG(args=[o_totalprice], partition=[], order=[]), 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt index b9b7d165d..c0b287617 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt @@ -2,13 +2,11 @@ ROOT(columns=[('name', r_name), ('pct_cust_positive', ROUND(percentage_expr_2, 2 JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(expr_2)}) - PROJECT(columns={'expr_2': POSITIVE(c_acctbal), 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(expr_3)}) - PROJECT(columns={'expr_3': POSITIVE(s_acctbal), 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(POSITIVE(c_acctbal))}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(POSITIVE(s_acctbal))}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_relmin.txt b/tests/test_plan_refsols/sqlite_udf_relmin.txt index a4606bb58..82015e622 100644 --- a/tests/test_plan_refsols/sqlite_udf_relmin.txt +++ b/tests/test_plan_refsols/sqlite_udf_relmin.txt @@ -1,5 +1,4 @@ ROOT(columns=[('month', month), ('n_orders', n_rows), ('m1', RELMIN(args=[n_rows], partition=[], order=[])), ('m2', RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], cumulative=True)), ('m3', RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], frame=(-1, 1)))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': month}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate)}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'month': MONTH(o_orderdate)}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index 995700439..df78044d3 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,14 +1,13 @@ ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) - AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'co_name': co_name, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_name': pr_name}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) - FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) - SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) + AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) + FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) + SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 89fe7388c..0c76e958a 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,12 +1,11 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index 79a611d7c..86f829343 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -1,15 +1,13 @@ ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric))], orderings=[(release_year):asc_first]) JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) - AGGREGATE(keys={'release_year': release_year}, aggregations={'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'n_rows': n_rows, 'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) + AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index 5914783da..fdf768b85 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,9 +1,8 @@ ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) - AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_brand': pr_brand}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 8aa3a87db..26d7d259d 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,6 +1,6 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'month': MONTH(ca_dt), 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': year}) + AGGREGATE(keys={'month': MONTH(ca_dt), 'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1, 'year': year}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows, 'year': t0.year}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 46de1e87a..19a48c5d6 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,11 +1,10 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'de_product_id': de_product_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 8eac1f4c8..f51c8a594 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -4,8 +4,8 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_4': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 4c754cbe5..dd5c32202 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,8 +1,8 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/tpch_q1.txt b/tests/test_plan_refsols/tpch_q1.txt index 14e38c38f..7588aed5a 100644 --- a/tests/test_plan_refsols/tpch_q1.txt +++ b/tests/test_plan_refsols/tpch_q1.txt @@ -1,5 +1,4 @@ ROOT(columns=[('L_RETURNFLAG', l_returnflag), ('L_LINESTATUS', l_linestatus), ('SUM_QTY', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('SUM_BASE_PRICE', DEFAULT_TO(sum_l_extendedprice, 0:numeric)), ('SUM_DISC_PRICE', DEFAULT_TO(sum_expr_9, 0:numeric)), ('SUM_CHARGE', DEFAULT_TO(sum_expr_8, 0:numeric)), ('AVG_QTY', avg_l_quantity), ('AVG_PRICE', avg_l_extendedprice), ('AVG_DISC', avg_l_discount), ('COUNT_ORDER', n_rows)], orderings=[(l_returnflag):asc_first, (l_linestatus):asc_first]) - AGGREGATE(keys={'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag}, aggregations={'avg_l_discount': AVG(l_discount), 'avg_l_extendedprice': AVG(l_extendedprice), 'avg_l_quantity': AVG(l_quantity), 'n_rows': COUNT(), 'sum_expr_8': SUM(expr_8), 'sum_expr_9': SUM(expr_9), 'sum_l_extendedprice': SUM(l_extendedprice), 'sum_l_quantity': SUM(l_quantity)}) - PROJECT(columns={'expr_8': l_extendedprice * 1:numeric - l_discount * 1:numeric + l_tax, 'expr_9': l_extendedprice * 1:numeric - l_discount, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag}) - FILTER(condition=l_shipdate <= datetime.date(1998, 12, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_shipdate': l_shipdate, 'l_tax': l_tax}) + AGGREGATE(keys={'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag}, aggregations={'avg_l_discount': AVG(l_discount), 'avg_l_extendedprice': AVG(l_extendedprice), 'avg_l_quantity': AVG(l_quantity), 'n_rows': COUNT(), 'sum_expr_8': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric + l_tax), 'sum_expr_9': SUM(l_extendedprice * 1:numeric - l_discount), 'sum_l_extendedprice': SUM(l_extendedprice), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=l_shipdate <= datetime.date(1998, 12, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_shipdate': l_shipdate, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index cc56c28d9..578e13359 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -2,11 +2,10 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', DEFAULT_ JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 633b50afb..8b4fb4c9e 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -2,19 +2,17 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE): FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) - AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) - PROJECT(columns={'metric': ps_supplycost * ps_availqty}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(ps_supplycost * ps_availqty)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q12.txt b/tests/test_plan_refsols/tpch_q12.txt index 7192a4e23..e12415ca9 100644 --- a/tests/test_plan_refsols/tpch_q12.txt +++ b/tests/test_plan_refsols/tpch_q12.txt @@ -1,7 +1,6 @@ ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', DEFAULT_TO(sum_is_high_priority, 0:numeric)), ('LOW_LINE_COUNT', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(l_shipmode):asc_first]) - AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(expr_2), 'sum_is_high_priority': SUM(is_high_priority)}) - PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'l_shipmode': l_shipmode}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) - FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]))), 'sum_is_high_priority': SUM(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]))}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) + FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/tpch_q13.txt b/tests/test_plan_refsols/tpch_q13.txt index ee9fe24e8..fd7b629fc 100644 --- a/tests/test_plan_refsols/tpch_q13.txt +++ b/tests/test_plan_refsols/tpch_q13.txt @@ -1,8 +1,7 @@ ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last], limit=10:numeric) - AGGREGATE(keys={'num_non_special_orders': num_non_special_orders}, aggregations={'CUSTDIST': COUNT()}) - PROJECT(columns={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_comment': o_comment, 'o_custkey': o_custkey}) + AGGREGATE(keys={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}, aggregations={'CUSTDIST': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_comment': o_comment, 'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q14.txt b/tests/test_plan_refsols/tpch_q14.txt index 672f4aaa9..ea3385773 100644 --- a/tests/test_plan_refsols/tpch_q14.txt +++ b/tests/test_plan_refsols/tpch_q14.txt @@ -1,7 +1,6 @@ ROOT(columns=[('PROMO_REVENUE', 100.0:numeric * DEFAULT_TO(sum_promo_value, 0:numeric) / DEFAULT_TO(sum_value, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_promo_value': SUM(promo_value), 'sum_value': SUM(value)}) - PROJECT(columns={'promo_value': IFF(STARTSWITH(p_type, 'PROMO':string), l_extendedprice * 1:numeric - l_discount, 0:numeric), 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'p_type': t1.p_type}) - FILTER(condition=MONTH(l_shipdate) == 9:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + AGGREGATE(keys={}, aggregations={'sum_promo_value': SUM(IFF(STARTSWITH(p_type, 'PROMO':string), l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'p_type': t1.p_type}) + FILTER(condition=MONTH(l_shipdate) == 9:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index ffb308cdf..b42afa376 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,16 +1,13 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[(s_suppkey):asc_first]) JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) - PROJECT(columns={'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) - SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={}, aggregations={'max_revenue': MAX(DEFAULT_TO(sum_expr_2, 0:numeric))}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) - PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q19.txt b/tests/test_plan_refsols/tpch_q19.txt index a31db3ead..6a4b3eccd 100644 --- a/tests/test_plan_refsols/tpch_q19.txt +++ b/tests/test_plan_refsols/tpch_q19.txt @@ -1,8 +1,7 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount}) - FILTER(condition=MONOTONIC(1:numeric, p_size, 5:numeric) & MONOTONIC(1:numeric, l_quantity, 11:numeric) & ISIN(p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & p_brand == 'Brand#12':string | MONOTONIC(1:numeric, p_size, 10:numeric) & MONOTONIC(10:numeric, l_quantity, 20:numeric) & ISIN(p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & p_brand == 'Brand#23':string | MONOTONIC(1:numeric, p_size, 15:numeric) & MONOTONIC(20:numeric, l_quantity, 30:numeric) & ISIN(p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & p_brand == 'Brand#34':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'p_brand': t1.p_brand, 'p_container': t1.p_container, 'p_size': t1.p_size}) - FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=MONOTONIC(1:numeric, p_size, 5:numeric) & MONOTONIC(1:numeric, l_quantity, 11:numeric) & ISIN(p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & p_brand == 'Brand#12':string | MONOTONIC(1:numeric, p_size, 10:numeric) & MONOTONIC(10:numeric, l_quantity, 20:numeric) & ISIN(p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & p_brand == 'Brand#23':string | MONOTONIC(1:numeric, p_size, 15:numeric) & MONOTONIC(20:numeric, l_quantity, 30:numeric) & ISIN(p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & p_brand == 'Brand#34':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'p_brand': t1.p_brand, 'p_container': t1.p_container, 'p_size': t1.p_size}) + FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index f6c53f4e2..fa8154ac2 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -1,11 +1,10 @@ ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first], limit=10:numeric) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) - FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index e8f41b9b7..44c860096 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,18 +1,17 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q6.txt b/tests/test_plan_refsols/tpch_q6.txt index 8181a5a2f..57562829d 100644 --- a/tests/test_plan_refsols/tpch_q6.txt +++ b/tests/test_plan_refsols/tpch_q6.txt @@ -1,5 +1,4 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_amt, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_amt': SUM(amt)}) - PROJECT(columns={'amt': l_extendedprice * l_discount}) - FILTER(condition=l_discount <= 0.07:numeric & l_quantity < 24:numeric & l_shipdate < datetime.date(1995, 1, 1):datetime & l_discount >= 0.05:numeric & l_shipdate >= datetime.date(1994, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + AGGREGATE(keys={}, aggregations={'sum_amt': SUM(l_extendedprice * l_discount)}) + FILTER(condition=l_discount <= 0.07:numeric & l_quantity < 24:numeric & l_shipdate < datetime.date(1995, 1, 1):datetime & l_discount >= 0.05:numeric & l_shipdate >= datetime.date(1994, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index e721ef7c6..8be0f5377 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,6 +1,6 @@ ROOT(columns=[('SUPP_NATION', n_name), ('CUST_NATION', cust_nation), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(n_name):asc_first, (cust_nation):asc_first, (l_year):asc_first]) - AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': l_year, 'n_name': n_name}, aggregations={'sum_volume': SUM(volume)}) - PROJECT(columns={'cust_nation': name_8, 'l_year': YEAR(l_shipdate), 'n_name': n_name, 'volume': l_extendedprice * 1:numeric - l_discount}) + AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': YEAR(l_shipdate), 'n_name': n_name}, aggregations={'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) + PROJECT(columns={'cust_nation': name_8, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name}) FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name, 'name_8': name_8}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) diff --git a/tests/test_plan_refsols/tpch_q8.txt b/tests/test_plan_refsols/tpch_q8.txt index 4f13bff60..e19874147 100644 --- a/tests/test_plan_refsols/tpch_q8.txt +++ b/tests/test_plan_refsols/tpch_q8.txt @@ -1,21 +1,20 @@ ROOT(columns=[('O_YEAR', O_YEAR), ('MKT_SHARE', DEFAULT_TO(sum_brazil_volume, 0:numeric) / DEFAULT_TO(sum_volume, 0:numeric))], orderings=[]) - AGGREGATE(keys={'O_YEAR': O_YEAR}, aggregations={'sum_brazil_volume': SUM(brazil_volume), 'sum_volume': SUM(volume)}) - PROJECT(columns={'O_YEAR': YEAR(o_orderdate), 'brazil_volume': IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric), 'volume': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) - FILTER(condition=ISIN(YEAR(o_orderdate), [1995, 1996]:array[unknown]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'O_YEAR': YEAR(o_orderdate)}, aggregations={'sum_brazil_volume': SUM(IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) + FILTER(condition=ISIN(YEAR(o_orderdate), [1995, 1996]:array[unknown]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index 634b9ee27..b82a25527 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -1,15 +1,14 @@ ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last], limit=10:numeric) - AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'n_name': n_name, 'o_year': YEAR(o_orderdate)}, aggregations={'sum_value': SUM(l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index 7193ed6f9..a5ed33abe 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -1,25 +1,24 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[(supp_region):asc_first]) - AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(percentage)}) - PROJECT(columns={'percentage': 100.0:numeric * max_n_instances / DEFAULT_TO(sum_n_instances, 0:numeric), 'supp_region': supp_region}) - AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) - AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) - FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / DEFAULT_TO(sum_n_instances, 0:numeric))}) + AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) + AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) + FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/year_month_nation_orders.txt b/tests/test_plan_refsols/year_month_nation_orders.txt index 2628b6b63..b91e4cd3c 100644 --- a/tests/test_plan_refsols/year_month_nation_orders.txt +++ b/tests/test_plan_refsols/year_month_nation_orders.txt @@ -1,12 +1,11 @@ ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last], limit=5:numeric) - AGGREGATE(keys={'n_name': n_name, 'order_month': order_month, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) - FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) + FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/yoy_change_in_num_orders.txt b/tests/test_plan_refsols/yoy_change_in_num_orders.txt index 8a2b5f26c..e4d7df518 100644 --- a/tests/test_plan_refsols/yoy_change_in_num_orders.txt +++ b/tests/test_plan_refsols/yoy_change_in_num_orders.txt @@ -1,4 +1,3 @@ ROOT(columns=[('year', year), ('current_year_orders', n_rows), ('pct_change', 100.0:numeric * n_rows - PREV(args=[n_rows], partition=[], order=[(year):asc_last]) / PREV(args=[n_rows], partition=[], order=[(year):asc_last]))], orderings=[(year):asc_first]) - AGGREGATE(keys={'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql index ab77ecbff..48ac2f401 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql @@ -3,7 +3,6 @@ WITH _s0 AS ( COUNT(sbdpclose) AS count_sbdpclose, MAX(sbdphigh) AS max_high, MIN(sbdplow) AS min_low, - SUM(sbdpclose) AS sum_sbdpclose, CONCAT_WS( '-', EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), @@ -15,6 +14,7 @@ WITH _s0 AS ( )) END ) AS month, + SUM(sbdpclose) AS sum_sbdpclose, sbdptickerid FROM main.sbdailyprice GROUP BY diff --git a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql index e74219954..8a07d126c 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql @@ -3,7 +3,6 @@ WITH _s0 AS ( COUNT(sbdpclose) AS count_sbdpclose, MAX(sbdphigh) AS max_high, MIN(sbdplow) AS min_low, - SUM(sbdpclose) AS sum_sbdpclose, CONCAT_WS( '-', CAST(STRFTIME('%Y', sbdpdate) AS INTEGER), @@ -15,6 +14,7 @@ WITH _s0 AS ( )) END ) AS month, + SUM(sbdpclose) AS sum_sbdpclose, sbdptickerid FROM main.sbdailyprice GROUP BY diff --git a/tests/test_sql_refsols/defog_broker_adv7_ansi.sql b/tests/test_sql_refsols/defog_broker_adv7_ansi.sql index 7c09391bf..32d6c5ab1 100644 --- a/tests/test_sql_refsols/defog_broker_adv7_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv7_ansi.sql @@ -1,6 +1,5 @@ WITH _s2 AS ( SELECT - COUNT(*) AS n_rows, CONCAT_WS( '-', EXTRACT(YEAR FROM CAST(sbcustjoindate AS DATETIME)), @@ -11,7 +10,8 @@ WITH _s2 AS ( 2 * -1 )) END - ) AS month + ) AS month, + COUNT(*) AS n_rows FROM main.sbcustomer WHERE sbcustjoindate < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) diff --git a/tests/test_sql_refsols/defog_broker_adv7_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv7_sqlite.sql index cf9b01eb3..ad71b6d6a 100644 --- a/tests/test_sql_refsols/defog_broker_adv7_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv7_sqlite.sql @@ -1,6 +1,5 @@ WITH _s2 AS ( SELECT - COUNT(*) AS n_rows, CONCAT_WS( '-', CAST(STRFTIME('%Y', sbcustjoindate) AS INTEGER), @@ -11,7 +10,8 @@ WITH _s2 AS ( 2 * -1 )) END - ) AS month + ) AS month, + COUNT(*) AS n_rows FROM main.sbcustomer WHERE sbcustjoindate < DATE('now', 'start of month') diff --git a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql index 2873935fa..cafd78875 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql @@ -1,14 +1,14 @@ WITH _s0 AS ( SELECT + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, SUM(sale_price) AS sum_sale_price, - customer_id, - DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter + customer_id FROM main.sales WHERE EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - customer_id, - DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)), + customer_id ), _t2 AS ( SELECT SUM(_s0.sum_sale_price) AS sum_sum_sale_price, diff --git a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql index a5e9a790c..d9d271f6e 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql @@ -1,7 +1,5 @@ WITH _s0 AS ( SELECT - SUM(sale_price) AS sum_sale_price, - customer_id, DATE( sale_date, 'start of month', @@ -10,12 +8,13 @@ WITH _s0 AS ( CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' - ) AS quarter + ) AS quarter, + SUM(sale_price) AS sum_sale_price, + customer_id FROM main.sales WHERE CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 GROUP BY - customer_id, DATE( sale_date, 'start of month', @@ -24,7 +23,8 @@ WITH _s0 AS ( CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' - ) + ), + customer_id ), _t2 AS ( SELECT SUM(_s0.sum_sale_price) AS sum_sum_sale_price, diff --git a/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql index a91e49158..60956d7c9 100644 --- a/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql @@ -7,7 +7,7 @@ WITH _s0 AS ( SELECT GROUP_CONCAT(SUBSTRING(n_name, 1, 1), '') AS agg_2 FROM tpch.nation -), _t5 AS ( +), _t2 AS ( SELECT DISTINCT o_orderpriority FROM tpch.orders @@ -16,7 +16,7 @@ WITH _s0 AS ( ), _s3 AS ( SELECT GROUP_CONCAT(SUBSTRING(o_orderpriority, 3), ' <=> ') AS agg_3 - FROM _t5 + FROM _t2 ) SELECT _s0.combine_strings_r_name AS s1, diff --git a/tests/test_sql_refsols/sqlite_udf_covar_pop_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_covar_pop_sqlite.sql index bc9653a8a..769eda523 100644 --- a/tests/test_sql_refsols/sqlite_udf_covar_pop_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_covar_pop_sqlite.sql @@ -1,24 +1,16 @@ WITH _s5 AS ( SELECT CAST(( - SUM(customer.c_acctbal * ( - CAST(orders.o_totalprice AS REAL) / 1000000.0 - )) - CAST(SUM(customer.c_acctbal) * SUM(( - CAST(orders.o_totalprice AS REAL) / 1000000.0 - )) AS REAL) / SUM( + SUM(CAST(customer.c_acctbal * orders.o_totalprice AS REAL) / 1000000.0) - CAST(SUM(customer.c_acctbal) * SUM(CAST(orders.o_totalprice AS REAL) / 1000000.0) AS REAL) / SUM( CASE - WHEN NOT ( - CAST(orders.o_totalprice AS REAL) / 1000000.0 - ) IS NULL + WHEN NOT CAST(orders.o_totalprice AS REAL) / 1000000.0 IS NULL AND NOT customer.c_acctbal IS NULL THEN 1 END ) ) AS REAL) / SUM( CASE - WHEN NOT ( - CAST(orders.o_totalprice AS REAL) / 1000000.0 - ) IS NULL + WHEN NOT CAST(orders.o_totalprice AS REAL) / 1000000.0 IS NULL AND NOT customer.c_acctbal IS NULL THEN 1 END diff --git a/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql index 400533afd..2bc509c65 100644 --- a/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql @@ -6,7 +6,7 @@ WITH _s1 AS ( FROM tpch.orders GROUP BY o_custkey -), _t3 AS ( +), _t2 AS ( SELECT MIN(customer.c_acctbal) OVER () AS min_bal, customer.c_acctbal, @@ -37,6 +37,6 @@ SELECT ) AS REAL) / COUNT(*), 2 ) AS p -FROM _t3 +FROM _t2 WHERE n_rows > 0 diff --git a/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql index 1ae607753..6375ec3a9 100644 --- a/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t2 AS ( +WITH _t1 AS ( SELECT AVG(o_totalprice) OVER () AS global_avg, o_totalprice @@ -37,4 +37,4 @@ SELECT ) AS REAL) / COUNT(*), 4 ) AS pct_e10000 -FROM _t2 +FROM _t1 diff --git a/tests/test_sql_refsols/sqlite_udf_relmin_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_relmin_sqlite.sql index 7811d9d12..d32c59dde 100644 --- a/tests/test_sql_refsols/sqlite_udf_relmin_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_relmin_sqlite.sql @@ -1,7 +1,7 @@ WITH _t0 AS ( SELECT - COUNT(*) AS n_rows, - CAST(STRFTIME('%m', o_orderdate) AS INTEGER) AS month + CAST(STRFTIME('%m', o_orderdate) AS INTEGER) AS month, + COUNT(*) AS n_rows FROM tpch.orders WHERE CAST(STRFTIME('%Y', o_orderdate) AS INTEGER) = 1994 diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql index 7a8882408..c00e952eb 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql @@ -12,8 +12,8 @@ WITH _s0 AS ( FROM main.products ), _s6 AS ( SELECT - SUM(_s0.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS release_year + EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS release_year, + SUM(_s0.n_rows) AS sum_n_rows FROM _s0 AS _s0 JOIN _s1 AS _s1 ON _s0.de_product_id = _s1.pr_id diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql index 5ef83cf6a..df94defe5 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql @@ -12,8 +12,8 @@ WITH _s0 AS ( FROM main.products ), _s6 AS ( SELECT - SUM(_s0.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS release_year + CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS release_year, + SUM(_s0.n_rows) AS sum_n_rows FROM _s0 AS _s0 JOIN _s1 AS _s1 ON _s0.de_product_id = _s1.pr_id diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index a80b170ef..1fd157a20 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,10 +1,10 @@ -WITH _t4 AS ( +WITH _t3 AS ( SELECT ca_dt FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t7 AS ( +), _t6 AS ( SELECT co_id, co_name @@ -14,38 +14,38 @@ WITH _t4 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t6.ca_dt - FROM _t4 AS _t6 + _t5.ca_dt + FROM _t3 AS _t5 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_t6.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_t5.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t7 AS _t7 - ON _t7.co_id = devices.de_production_country_id + JOIN _t6 AS _t6 + ON _t6.co_id = devices.de_production_country_id GROUP BY - _t6.ca_dt + _t5.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t9.ca_dt - FROM _t4 AS _t9 + _t8.ca_dt + FROM _t3 AS _t8 JOIN main.incidents AS incidents - ON _t9.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _t8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t10 - ON _t10.co_id = devices.de_production_country_id + JOIN _t6 AS _t9 + ON _t9.co_id = devices.de_production_country_id GROUP BY - _t9.ca_dt + _t8.ca_dt ) SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t4 AS _t4 +FROM _t3 AS _t3 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t4.ca_dt + ON _s7.ca_dt = _t3.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t4.ca_dt + ON _s15.ca_dt = _t3.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index de7f0b427..b30eff167 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,10 +1,10 @@ -WITH _t4 AS ( +WITH _t3 AS ( SELECT ca_dt FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t7 AS ( +), _t6 AS ( SELECT co_id, co_name @@ -14,38 +14,38 @@ WITH _t4 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t6.ca_dt - FROM _t4 AS _t6 + _t5.ca_dt + FROM _t3 AS _t5 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_t6.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_t5.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t7 AS _t7 - ON _t7.co_id = devices.de_production_country_id + JOIN _t6 AS _t6 + ON _t6.co_id = devices.de_production_country_id GROUP BY - _t6.ca_dt + _t5.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t9.ca_dt - FROM _t4 AS _t9 + _t8.ca_dt + FROM _t3 AS _t8 JOIN main.incidents AS incidents - ON _t9.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _t8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t10 - ON _t10.co_id = devices.de_production_country_id + JOIN _t6 AS _t9 + ON _t9.co_id = devices.de_production_country_id GROUP BY - _t9.ca_dt + _t8.ca_dt ) SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t4 AS _t4 +FROM _t3 AS _t3 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t4.ca_dt + ON _s7.ca_dt = _t3.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t4.ca_dt + ON _s15.ca_dt = _t3.ca_dt GROUP BY - CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER) + CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql index d56b6a7e2..1068846d6 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t7 AS ( +), _t6 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t7 - ON _t7.pr_id = devices.de_product_id + JOIN _t6 AS _t6 + ON _t6.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t7 AS _t9 - ON _t9.pr_id = devices.de_product_id + JOIN _t6 AS _t8 + ON _t8.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql index 40f2461ce..7c9c0c3f5 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t7 AS ( +), _t6 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t7 - ON _t7.pr_id = devices.de_product_id + JOIN _t6 AS _t6 + ON _t6.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t7 AS _t9 - ON _t9.pr_id = devices.de_product_id + JOIN _t6 AS _t8 + ON _t8.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index f76f36d96..212223900 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -3,7 +3,7 @@ WITH _s0 AS ( s_nationkey, s_suppkey FROM tpch.supplier -), _t4 AS ( +), _t3 AS ( SELECT n_name, n_nationkey @@ -16,8 +16,8 @@ WITH _s0 AS ( FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t4 - ON _s0.s_nationkey = _t4.n_nationkey + JOIN _t3 AS _t3 + ON _s0.s_nationkey = _t3.n_nationkey ), _s9 AS ( SELECT SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, @@ -25,8 +25,8 @@ WITH _s0 AS ( FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t7 - ON _s4.s_nationkey = _t7.n_nationkey + JOIN _t3 AS _t5 + ON _s4.s_nationkey = _t5.n_nationkey GROUP BY partsupp.ps_partkey ) diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index f76f36d96..212223900 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -3,7 +3,7 @@ WITH _s0 AS ( s_nationkey, s_suppkey FROM tpch.supplier -), _t4 AS ( +), _t3 AS ( SELECT n_name, n_nationkey @@ -16,8 +16,8 @@ WITH _s0 AS ( FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t4 - ON _s0.s_nationkey = _t4.n_nationkey + JOIN _t3 AS _t3 + ON _s0.s_nationkey = _t3.n_nationkey ), _s9 AS ( SELECT SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, @@ -25,8 +25,8 @@ WITH _s0 AS ( FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t7 - ON _s4.s_nationkey = _t7.n_nationkey + JOIN _t3 AS _t5 + ON _s4.s_nationkey = _t5.n_nationkey GROUP BY partsupp.ps_partkey ) diff --git a/tests/test_sql_refsols/tpch_q15_ansi.sql b/tests/test_sql_refsols/tpch_q15_ansi.sql index fdfcf8468..9e3993845 100644 --- a/tests/test_sql_refsols/tpch_q15_ansi.sql +++ b/tests/test_sql_refsols/tpch_q15_ansi.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t3 AS ( SELECT l_discount, l_extendedprice, @@ -14,7 +14,7 @@ WITH _t5 AS ( 1 - l_discount )) AS sum_expr_2, l_suppkey - FROM _t5 + FROM _t3 GROUP BY l_suppkey ), _s2 AS ( @@ -29,7 +29,7 @@ WITH _t5 AS ( 1 - l_discount )) AS sum_expr_3, l_suppkey - FROM _t5 + FROM _t3 GROUP BY l_suppkey ) diff --git a/tests/test_sql_refsols/tpch_q15_sqlite.sql b/tests/test_sql_refsols/tpch_q15_sqlite.sql index 4b6f85ec0..ab90cff0e 100644 --- a/tests/test_sql_refsols/tpch_q15_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q15_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t3 AS ( SELECT l_discount, l_extendedprice, @@ -13,7 +13,7 @@ WITH _t5 AS ( 1 - l_discount )) AS sum_expr_2, l_suppkey - FROM _t5 + FROM _t3 GROUP BY l_suppkey ), _s2 AS ( @@ -28,7 +28,7 @@ WITH _t5 AS ( 1 - l_discount )) AS sum_expr_3, l_suppkey - FROM _t5 + FROM _t3 GROUP BY l_suppkey ) diff --git a/tests/test_sql_refsols/tpch_q7_ansi.sql b/tests/test_sql_refsols/tpch_q7_ansi.sql index 658b8e12a..8a66ea8ad 100644 --- a/tests/test_sql_refsols/tpch_q7_ansi.sql +++ b/tests/test_sql_refsols/tpch_q7_ansi.sql @@ -38,8 +38,8 @@ JOIN _s9 AS _s9 WHERE EXTRACT(YEAR FROM CAST(lineitem.l_shipdate AS DATETIME)) IN (1995, 1996) GROUP BY - _s9.n_name, EXTRACT(YEAR FROM CAST(lineitem.l_shipdate AS DATETIME)), + _s9.n_name, _s1.n_name ORDER BY _s1.n_name, diff --git a/tests/test_sql_refsols/tpch_q7_sqlite.sql b/tests/test_sql_refsols/tpch_q7_sqlite.sql index 04758cf86..8b8a9ea7f 100644 --- a/tests/test_sql_refsols/tpch_q7_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q7_sqlite.sql @@ -38,8 +38,8 @@ JOIN _s9 AS _s9 WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) IN (1995, 1996) GROUP BY - _s9.n_name, CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER), + _s9.n_name, _s1.n_name ORDER BY _s1.n_name, diff --git a/tests/test_sql_refsols/tpch_q9_ansi.sql b/tests/test_sql_refsols/tpch_q9_ansi.sql index 12fe871b0..61f920400 100644 --- a/tests/test_sql_refsols/tpch_q9_ansi.sql +++ b/tests/test_sql_refsols/tpch_q9_ansi.sql @@ -22,8 +22,8 @@ JOIN tpch.partsupp AS partsupp ON lineitem.l_partkey = partsupp.ps_partkey AND lineitem.l_suppkey = partsupp.ps_suppkey GROUP BY - nation.n_name, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)), + nation.n_name ORDER BY nation.n_name, o_year DESC diff --git a/tests/test_sql_refsols/tpch_q9_sqlite.sql b/tests/test_sql_refsols/tpch_q9_sqlite.sql index 37e726db9..0a5405f0d 100644 --- a/tests/test_sql_refsols/tpch_q9_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q9_sqlite.sql @@ -22,8 +22,8 @@ JOIN tpch.partsupp AS partsupp ON lineitem.l_partkey = partsupp.ps_partkey AND lineitem.l_suppkey = partsupp.ps_suppkey GROUP BY - nation.n_name, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER), + nation.n_name ORDER BY nation.n_name, o_year DESC From 08925815c4d320fbfdcc5102bbaf96cce5c7382c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 13:59:55 -0400 Subject: [PATCH 017/143] Added SUM(1)->COUNT() optimization --- pydough/conversion/projection_pullup.py | 26 ++++++++++++++++++- tests/test_plan_refsols/common_prefix_a.txt | 2 +- tests/test_plan_refsols/common_prefix_b.txt | 2 +- tests/test_plan_refsols/common_prefix_c.txt | 4 +-- tests/test_plan_refsols/common_prefix_d.txt | 2 +- tests/test_plan_refsols/common_prefix_e.txt | 2 +- tests/test_plan_refsols/common_prefix_f.txt | 2 +- tests/test_plan_refsols/common_prefix_g.txt | 2 +- tests/test_plan_refsols/common_prefix_h.txt | 4 +-- tests/test_plan_refsols/correl_26.txt | 2 +- tests/test_plan_refsols/correl_27.txt | 2 +- tests/test_plan_refsols/correl_28.txt | 2 +- .../count_cust_supplier_nation_combos.txt | 2 +- 13 files changed, 39 insertions(+), 15 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 4e353c914..b4f013a76 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -7,6 +7,7 @@ __all__ = ["pullup_projections"] +import pydough.pydough_operators as pydop from pydough.relational import ( Aggregate, CallExpression, @@ -16,6 +17,7 @@ Join, JoinType, Limit, + LiteralExpression, Project, RelationalExpression, RelationalNode, @@ -29,6 +31,7 @@ from pydough.relational.relational_expressions.column_reference_finder import ( ColumnReferenceFinder, ) +from pydough.types import NumericType from .merge_projects import merge_adjacent_projects @@ -272,7 +275,7 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: for name, expr in node.aggregations.items(): new_expr = apply_substitution(expr, substitutions, {}) assert isinstance(new_expr, CallExpression) - new_aggs[name] = new_expr + new_aggs[name] = simplify_agg(new_expr) return Aggregate( input=node.input, keys=new_keys, @@ -280,6 +283,27 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: ) +def simplify_agg(agg: CallExpression) -> CallExpression: + """ + TODO + """ + arg: RelationalExpression + if agg.op == pydop.SUM: + arg = agg.inputs[0] + if ( + isinstance(arg, LiteralExpression) + and isinstance(arg.data_type, NumericType) + and arg.value == 1 + ): + return CallExpression( + op=pydop.COUNT, + return_type=agg.data_type, + inputs=[], + ) + # In all other cases, we just return the aggregation as is. + return agg + + def pullup_projections(node: RelationalNode) -> RelationalNode: """ TODO diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index e00d11d91..ae633b5bc 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(1:numeric)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 6d74fea0f..83cf63488 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(1:numeric), 'n_suppliers': SUM(n_suppliers)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT(), 'n_suppliers': SUM(n_suppliers)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index e9ba7c35d..dcec64b51 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -2,7 +2,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(1:numeric), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -12,7 +12,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 1bb9d040a..35f5575ff 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -2,7 +2,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(1:numeric), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index bf725b8e1..da9a367f2 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(1:numeric)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index a30d8068c..b03a4111a 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_sum_n_rows)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(1:numeric), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT(), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index d4b65fe8d..88aaeef4c 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_suppliers), ('n_nations', sum_agg_2)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(1:numeric)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': COUNT()}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index c777a3423..8540424fb 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -2,7 +2,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_T JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_0, 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -12,7 +12,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_T AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 6c7d2bd5a..79e6735e6 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(1:numeric), 'nation_name_0': ANYTHING(n_name)}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': COUNT(), 'nation_name_0': ANYTHING(n_name)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 9bca98b3e..cfbaa563a 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -2,7 +2,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(1:numeric)}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index f83a5a536..0100d1072 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -2,7 +2,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(1:numeric)}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index a1e3920bd..f8c87d703 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -5,7 +5,7 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': SUM(1:numeric), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) From a69d7640a07331d1ae06a894d56b7bf548847b8c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 14 Jul 2025 00:48:05 -0400 Subject: [PATCH 018/143] Cleanup merge projects --- pydough/conversion/merge_projects.py | 69 ++++--------------- .../sqlite_udf_percent_epsilon.txt | 4 +- .../time_threshold_reached.txt | 11 ++- .../global_acctbal_breakdown_sqlite.sql | 7 +- .../nation_acctbal_breakdown_sqlite.sql | 9 ++- .../region_acctbal_breakdown_sqlite.sql | 9 ++- .../sqlite_udf_percent_epsilon_sqlite.sql | 52 ++++++-------- .../time_threshold_reached_ansi.sql | 5 +- .../time_threshold_reached_sqlite.sql | 5 +- 9 files changed, 55 insertions(+), 116 deletions(-) diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index cc5eac798..8c400cb86 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -188,46 +188,13 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: # no longer a projection. while isinstance(node.input, Project): child_project: Project = node.input - if isinstance(node, RelationalRoot): - # # The columns of the projection can be sucked into the root - # # above it if they are all pass-through/renamings, or if there - # # is no convolution created (only allowed if there are no - # # ordering expressions). - # if all( - # isinstance(expr, ColumnReference) - # for expr in child_project.columns.values() - # ) or ( - # len(node.orderings) == 0 - # and merging_doesnt_create_convolution( - # node.columns, child_project.columns - # ) - # ): - # # Replace all column references in the root's columns with - # # the expressions from the child projection.. - # for idx, (name, expr) in enumerate(node.ordered_columns): - # new_expr = transpose_expression(expr, child_project.columns) - # node.columns[name] = new_expr - # node.ordered_columns[idx] = (name, new_expr) - # # Do the same with the sort expressions. - # for idx, sort_info in enumerate(node.orderings): - # new_expr = transpose_expression( - # sort_info.expr, child_project.columns - # ) - # node.orderings[idx] = ExpressionSortInfo( - # new_expr, sort_info.ascending, sort_info.nulls_first - # ) - # # Delete the child projection from the tree, replacing it - # # with its input. - # node._input = child_project.input - # else: - # # Otherwise, halt the merging process since it is no longer - # # possible to merge the children of this root into it. - # break - # TODO: ADD COMMENTS - if not ( - any(contains_window(expr) for expr in child_project.columns.values()) - and any(contains_window(expr) for expr in node.columns.values()) - ): + # The columns of the projection can be sucked into the parent + # above it unless there is a window function in both. + if not ( + any(contains_window(expr) for expr in child_project.columns.values()) + and any(contains_window(expr) for expr in node.columns.values()) + ): + if isinstance(node, RelationalRoot): # Replace all column references in the root's columns with # the expressions from the child projection. for idx, (name, expr) in enumerate(node.ordered_columns): @@ -245,28 +212,18 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: # Delete the child projection from the tree, replacing it # with its input. node._input = child_project.input - else: - # Otherwise, halt the merging process since it is no longer - # possible to merge the children of this root into it. - break - elif isinstance(node, Project): - # The columns of the projection can be sucked into the - # projection above it if they are all pass-through/renamings - # or if there is no convolution created. - if all( - isinstance(expr, ColumnReference) - for expr in child_project.columns.values() - ) or merging_doesnt_create_convolution(node.columns, child_project.columns): + continue + elif isinstance(node, Project): for name, expr in node.columns.items(): new_expr = transpose_expression(expr, child_project.columns) node.columns[name] = new_expr # Delete the child projection from the tree, replacing it # with its input. node._input = child_project.input - else: - # Otherwise, halt the merging process since it is no longer - # possible to merge the children of this project into it. - break + continue + # Otherwise, halt the merging process since it is no longer + # possible to merge the children of this project into it. + break # Final round: if there is a project on top of a scan, aggregate, filter, # or limit that only does column pruning/renaming, just push it into the # node. diff --git a/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt index 30719771b..e418e0d86 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt @@ -1,5 +1,5 @@ ROOT(columns=[('pct_e1', ROUND(percentage_expr_5, 4:numeric)), ('pct_e10', ROUND(percentage_expr_6, 4:numeric)), ('pct_e100', ROUND(percentage_expr_7, 4:numeric)), ('pct_e1000', ROUND(percentage_expr_8, 4:numeric)), ('pct_e10000', ROUND(percentage_expr_9, 4:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'percentage_expr_5': PERCENTAGE(EPSILON(o_totalprice, global_avg, 1:numeric)), 'percentage_expr_6': PERCENTAGE(EPSILON(o_totalprice, global_avg, 10:numeric)), 'percentage_expr_7': PERCENTAGE(EPSILON(o_totalprice, global_avg, 100:numeric)), 'percentage_expr_8': PERCENTAGE(EPSILON(o_totalprice, global_avg, 1000:numeric)), 'percentage_expr_9': PERCENTAGE(EPSILON(o_totalprice, global_avg, 10000:numeric))}) - PROJECT(columns={'global_avg': RELAVG(args=[o_totalprice], partition=[], order=[]), 'o_totalprice': o_totalprice}) + AGGREGATE(keys={}, aggregations={'percentage_expr_5': PERCENTAGE(expr_5), 'percentage_expr_6': PERCENTAGE(expr_6), 'percentage_expr_7': PERCENTAGE(expr_7), 'percentage_expr_8': PERCENTAGE(expr_8), 'percentage_expr_9': PERCENTAGE(expr_9)}) + PROJECT(columns={'expr_5': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 1:numeric), 'expr_6': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 10:numeric), 'expr_7': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 100:numeric), 'expr_8': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 1000:numeric), 'expr_9': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 10000:numeric)}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/time_threshold_reached.txt b/tests/test_plan_refsols/time_threshold_reached.txt index 8b94ac8f2..ba32986b7 100644 --- a/tests/test_plan_refsols/time_threshold_reached.txt +++ b/tests/test_plan_refsols/time_threshold_reached.txt @@ -1,7 +1,6 @@ ROOT(columns=[('date_time', sbTxDateTime)], orderings=[(sbTxDateTime):asc_first]) - FILTER(condition=RANKING(args=[], partition=[txn_day], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) - FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) - PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) - FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) + FILTER(condition=RANKING(args=[], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) + FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime}) + PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[]), 'sbTxDateTime': sbTxDateTime}) + FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) diff --git a/tests/test_sql_refsols/global_acctbal_breakdown_sqlite.sql b/tests/test_sql_refsols/global_acctbal_breakdown_sqlite.sql index c3cabeb5a..4cfe3d75f 100644 --- a/tests/test_sql_refsols/global_acctbal_breakdown_sqlite.sql +++ b/tests/test_sql_refsols/global_acctbal_breakdown_sqlite.sql @@ -39,13 +39,12 @@ WITH _t0 AS ( THEN CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END ELSE NULL END AS expr_7, - CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END AS negative_acctbal, - CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END AS non_negative_acctbal + c_acctbal FROM tpch.customer ) SELECT - COUNT(negative_acctbal) AS n_red_acctbal, - COUNT(non_negative_acctbal) AS n_black_acctbal, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, AVG(expr_7) AS median_red_acctbal, AVG(expr_5) AS median_black_acctbal, AVG(expr_6) AS median_overall_acctbal diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql index 72c55d250..e53fbbd46 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql @@ -39,17 +39,16 @@ WITH _t2 AS ( THEN CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END ELSE NULL END AS expr_7, - c_nationkey, - CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END AS negative_acctbal, - CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END AS non_negative_acctbal + c_acctbal, + c_nationkey FROM tpch.customer ), _s3 AS ( SELECT AVG(expr_5) AS median_black_acctbal, AVG(expr_6) AS median_overall_acctbal, AVG(expr_7) AS median_red_acctbal, - COUNT(non_negative_acctbal) AS n_black_acctbal, - COUNT(negative_acctbal) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, c_nationkey FROM _t2 GROUP BY diff --git a/tests/test_sql_refsols/region_acctbal_breakdown_sqlite.sql b/tests/test_sql_refsols/region_acctbal_breakdown_sqlite.sql index 4883afc1d..74fde6005 100644 --- a/tests/test_sql_refsols/region_acctbal_breakdown_sqlite.sql +++ b/tests/test_sql_refsols/region_acctbal_breakdown_sqlite.sql @@ -39,9 +39,8 @@ WITH _t1 AS ( THEN CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END ELSE NULL END AS expr_7, - nation.n_regionkey, - CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END AS negative_acctbal, - CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END AS non_negative_acctbal + customer.c_acctbal, + nation.n_regionkey FROM tpch.nation AS nation JOIN tpch.customer AS customer ON customer.c_nationkey = nation.n_nationkey @@ -50,8 +49,8 @@ WITH _t1 AS ( AVG(expr_5) AS median_black_acctbal, AVG(expr_6) AS median_overall_acctbal, AVG(expr_7) AS median_red_acctbal, - COUNT(non_negative_acctbal) AS n_black_acctbal, - COUNT(negative_acctbal) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, n_regionkey FROM _t1 GROUP BY diff --git a/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql index 6375ec3a9..3af0c6f54 100644 --- a/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql @@ -1,40 +1,28 @@ WITH _t1 AS ( SELECT - AVG(o_totalprice) OVER () AS global_avg, - o_totalprice + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 1 AS expr_5, + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 10 AS expr_6, + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 100 AS expr_7, + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 1000 AS expr_8, + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 10000 AS expr_9 FROM tpch.orders WHERE CAST(STRFTIME('%Y', o_orderdate) AS INTEGER) = 1992 ) SELECT - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 1 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e1, - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 10 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e10, - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 100 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e100, - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 1000 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e1000, - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 10000 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e10000 + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_5 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e1, + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_6 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e10, + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_7 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e100, + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_8 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e1000, + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_9 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e10000 FROM _t1 diff --git a/tests/test_sql_refsols/time_threshold_reached_ansi.sql b/tests/test_sql_refsols/time_threshold_reached_ansi.sql index 853086170..bb5d82ba1 100644 --- a/tests/test_sql_refsols/time_threshold_reached_ansi.sql +++ b/tests/test_sql_refsols/time_threshold_reached_ansi.sql @@ -3,8 +3,7 @@ WITH _t3 AS ( ( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) / SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP))) AS pct_of_day, - sbtxdatetime, - DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day + sbtxdatetime FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) = 2023 @@ -15,7 +14,7 @@ WITH _t3 AS ( WHERE pct_of_day >= 50.0 QUALIFY - ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day NULLS LAST) = 1 + ROW_NUMBER() OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY pct_of_day NULLS LAST) = 1 ) SELECT sbtxdatetime AS date_time diff --git a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql index 5e47efb24..a21717a9c 100644 --- a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql +++ b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql @@ -3,15 +3,14 @@ WITH _t3 AS ( CAST(( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day') ORDER BY sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS REAL) / SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day')) AS pct_of_day, - sbtxdatetime, - DATE(sbtxdatetime, 'start of day') AS txn_day + sbtxdatetime FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) = 2023 ), _t AS ( SELECT sbtxdatetime, - ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day) AS _w + ROW_NUMBER() OVER (PARTITION BY DATE(sbtxdatetime, 'start of day') ORDER BY pct_of_day) AS _w FROM _t3 WHERE pct_of_day >= 50.0 From 13d9844430e627cb4082c1882848a9cb6fd529db Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 14 Jul 2025 13:56:21 -0400 Subject: [PATCH 019/143] Added some adjacent aggregaiton merging --- pydough/conversion/projection_pullup.py | 71 +++++++++++++++++++ tests/test_plan_refsols/common_prefix_b.txt | 5 +- tests/test_plan_refsols/common_prefix_c.txt | 11 ++- tests/test_plan_refsols/common_prefix_d.txt | 5 +- tests/test_plan_refsols/common_prefix_f.txt | 5 +- tests/test_plan_refsols/common_prefix_g.txt | 5 +- tests/test_plan_refsols/common_prefix_h.txt | 11 ++- tests/test_plan_refsols/correl_14.txt | 29 ++++---- tests/test_plan_refsols/correl_15.txt | 35 +++++---- tests/test_plan_refsols/correl_16.txt | 25 ++++--- tests/test_plan_refsols/correl_18.txt | 19 +++-- .../multi_partition_access_2.txt | 9 +-- .../multi_partition_access_5.txt | 8 +-- .../multi_partition_access_6.txt | 41 +++++------ 14 files changed, 164 insertions(+), 115 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index b4f013a76..1857bf5cd 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -27,6 +27,7 @@ add_input_name, apply_substitution, contains_window, + transpose_expression, ) from pydough.relational.relational_expressions.column_reference_finder import ( ColumnReferenceFinder, @@ -304,6 +305,75 @@ def simplify_agg(agg: CallExpression) -> CallExpression: return agg +def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: + """ + TODO + """ + if not isinstance(node.input, Aggregate): + return node + + input_agg: Aggregate = node.input + + top_keys: set[RelationalExpression] = { + transpose_expression(expr, input_agg.columns) for expr in node.keys.values() + } + bottom_keys: set[RelationalExpression] = set(input_agg.keys.values()) + + # print() + # print("Top keys:") + # for key in top_keys: + # print(f" {key.to_string(True)}") + # print("Bottom keys:") + # for key in bottom_keys: + # print(f" {key.to_string(True)}") + + if len(top_keys - bottom_keys) > 0: + return node + + bottom_only_keys: set[RelationalExpression] = bottom_keys - top_keys + + new_keys: dict[str, RelationalExpression] = { + name: transpose_expression(expr, input_agg.columns) + for name, expr in node.keys.items() + } + new_aggs: dict[str, CallExpression] = {} + input_expr: RelationalExpression + for agg_name, agg_expr in node.aggregations.items(): + match agg_expr.op: + case pydop.COUNT if len(agg_expr.inputs) == 0: + if len(bottom_only_keys) == 0: + new_aggs[agg_name] = CallExpression( + op=pydop.ANYTHING, + return_type=agg_expr.data_type, + inputs=[LiteralExpression(1, agg_expr.data_type)], + ) + elif len(bottom_only_keys) == 1: + new_aggs[agg_name] = CallExpression( + op=pydop.NDISTINCT, + return_type=agg_expr.data_type, + inputs=[next(iter(bottom_only_keys))], + ) + else: + return node + case pydop.SUM: + input_expr = transpose_expression(agg_expr.inputs[0], input_agg.columns) + if isinstance(input_expr, CallExpression) and input_expr.op in ( + pydop.SUM, + pydop.COUNT, + ): + new_aggs[agg_name] = input_expr + else: + return node + case _: + return node + + return Aggregate( + input=input_agg.input, + keys=new_keys, + aggregations=new_aggs, + ) + + def pullup_projections(node: RelationalNode) -> RelationalNode: """ TODO @@ -325,6 +395,7 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: pull_project_into_limit(node) return pull_non_columns(node) case Aggregate(): + node = merge_adjacent_aggregations(node) return pull_project_into_aggregate(node) case _: return node diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 83cf63488..d4bfb2b5b 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -7,6 +7,5 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index dcec64b51..c7f3f076b 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -11,9 +11,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 35f5575ff..a9e20fe8d 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -20,6 +20,5 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index b03a4111a..8de55d20c 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -7,6 +7,5 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 88aaeef4c..35ed92de0 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -7,6 +7,5 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 8540424fb..fc806c3fc 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -11,9 +11,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_T SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 93bd2de6c..0c0d15dde 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,16 +1,15 @@ ROOT(columns=[('n', n)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n': COUNT()}) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) + FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 264641ef8..dda6f11b4 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,19 +1,18 @@ ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={}, aggregations={'n_rows': NDISTINCT(s_suppkey)}) + FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_16.txt b/tests/test_plan_refsols/correl_16.txt index 31ea9a869..0b8509b58 100644 --- a/tests/test_plan_refsols/correl_16.txt +++ b/tests/test_plan_refsols/correl_16.txt @@ -1,14 +1,13 @@ ROOT(columns=[('n', n)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n': COUNT()}) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=PERCENTILE(args=[], partition=[c_nationkey, s_suppkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=10000) == tile, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - PROJECT(columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey, 'tile': PERCENTILE(args=[], partition=[], order=[(s_acctbal):asc_last, (s_suppkey):asc_last], n_buckets=10000)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) + FILTER(condition=PERCENTILE(args=[], partition=[c_nationkey, s_suppkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=10000) == tile, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + PROJECT(columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey, 'tile': PERCENTILE(args=[], partition=[], order=[(s_acctbal):asc_last, (s_suppkey):asc_last], n_buckets=10000)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 5f6ca684d..646b9855d 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,11 +1,10 @@ ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_n_above_avg': SUM(n_above_avg)}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_above_avg': COUNT()}) - FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) - FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={}, aggregations={'sum_n_above_avg': COUNT()}) + FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) + FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 0bcb1f4aa..e42e62646 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -9,8 +9,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) @@ -21,8 +20,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) @@ -33,8 +31,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index bca9f137e..6c6ea66d7 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -6,16 +6,14 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 5ee7193e5..69f9b27a4 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -3,38 +3,33 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) @@ -44,15 +39,13 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) From c4971274679453428fa1096c569dcd82eafcf2a3 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 14 Jul 2025 14:03:40 -0400 Subject: [PATCH 020/143] Added min/min, max/max, anything/anything cases --- pydough/conversion/projection_pullup.py | 9 +++++++++ tests/test_plan_refsols/multi_partition_access_4.txt | 5 ++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 1857bf5cd..cf1658866 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -364,6 +364,15 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: new_aggs[agg_name] = input_expr else: return node + case pydop.MIN | pydop.MAX | pydop.ANYTHING: + input_expr = transpose_expression(agg_expr.inputs[0], input_agg.columns) + if ( + isinstance(input_expr, CallExpression) + and input_expr.op == agg_expr.op + ): + new_aggs[agg_name] = input_expr + else: + return node case _: return node diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 01a79cbf9..f64e8d845 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -2,9 +2,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=sbTxShares < cust_max_shares & sbTxShares >= cust_ticker_max_shares, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t0.cust_ticker_max_shares, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(cust_ticker_max_shares)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) From f74fc5c1a2a5b721bf6c2d585411a2344718428c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 11:43:24 -0400 Subject: [PATCH 021/143] Adding more aggregation simplification and comments --- pydough/conversion/projection_pullup.py | 59 ++++++++++++------- .../aggregation_analytics_2.txt | 4 +- .../aggregation_analytics_3.txt | 4 +- tests/test_plan_refsols/correl_26.txt | 6 +- tests/test_plan_refsols/correl_27.txt | 6 +- tests/test_plan_refsols/correl_28.txt | 6 +- tests/test_plan_refsols/correl_30.txt | 4 +- .../epoch_event_gap_per_era.txt | 4 +- .../epoch_intra_season_searches.txt | 6 +- .../epoch_event_gap_per_era_ansi.sql | 2 +- .../epoch_event_gap_per_era_sqlite.sql | 2 +- .../epoch_intra_season_searches_ansi.sql | 10 ++-- .../epoch_intra_season_searches_sqlite.sql | 10 ++-- 13 files changed, 69 insertions(+), 54 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index cf1658866..3ba23982a 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -268,41 +268,64 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: ): ref_expr: ColumnReference = ColumnReference(name, expr.data_type) substitutions[ref_expr] = new_expr + new_columns: dict[str, RelationalExpression] = { + name: ColumnReference(name, expr.data_type) for name, expr in node.keys.items() + } new_keys: dict[str, RelationalExpression] = { name: apply_substitution(expr, substitutions, {}) for name, expr in node.keys.items() } new_aggs: dict[str, CallExpression] = {} + out_expr: RelationalExpression + new_agg_expr: CallExpression | None for name, expr in node.aggregations.items(): new_expr = apply_substitution(expr, substitutions, {}) assert isinstance(new_expr, CallExpression) - new_aggs[name] = simplify_agg(new_expr) - return Aggregate( + out_expr, new_agg_expr = simplify_agg(new_keys, new_expr, name) + new_columns[name] = out_expr + if new_agg_expr is not None: + new_aggs[name] = new_agg_expr + agg: Aggregate = Aggregate( input=node.input, keys=new_keys, aggregations=new_aggs, ) + return Project(input=agg, columns=new_columns) -def simplify_agg(agg: CallExpression) -> CallExpression: +def simplify_agg( + keys: dict[str, RelationalExpression], agg: CallExpression, name: str +) -> tuple[RelationalExpression, CallExpression | None]: """ TODO """ + reverse_keys: dict[RelationalExpression, str] = { + expr: name for name, expr in keys.items() + } + out_ref: RelationalExpression = ColumnReference(name, agg.data_type) arg: RelationalExpression - if agg.op == pydop.SUM: + if agg.op in (pydop.SUM, pydop.COUNT) and len(agg.inputs) == 1: arg = agg.inputs[0] - if ( - isinstance(arg, LiteralExpression) - and isinstance(arg.data_type, NumericType) - and arg.value == 1 + if isinstance(arg, LiteralExpression) and isinstance( + arg.data_type, NumericType ): - return CallExpression( - op=pydop.COUNT, - return_type=agg.data_type, - inputs=[], - ) + if (agg.op == pydop.SUM and arg.value == 1) or ( + agg.op == pydop.COUNT and arg.value is not None + ): + return out_ref, CallExpression( + op=pydop.COUNT, + return_type=agg.data_type, + inputs=[], + ) + + # If the aggregation is on a key, we can just return the key. + if agg.op in (pydop.SUM, pydop.MIN, pydop.MAX, pydop.ANYTHING): + arg = agg.inputs[0] + if arg in reverse_keys: + return ColumnReference(reverse_keys[arg], agg.data_type), None + # In all other cases, we just return the aggregation as is. - return agg + return out_ref, agg def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: @@ -319,14 +342,6 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: } bottom_keys: set[RelationalExpression] = set(input_agg.keys.values()) - # print() - # print("Top keys:") - # for key in top_keys: - # print(f" {key.to_string(True)}") - # print("Bottom keys:") - # for key in bottom_keys: - # print(f" {key.to_string(True)}") - if len(top_keys - bottom_keys) > 0: return node diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index af9436cf0..1d8f376f1 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index cf4d7e4f7..df3d64e66 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 79e6735e6..4cb7eba80 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': COUNT(), 'nation_name_0': ANYTHING(n_name)}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(n_name)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index cfbaa563a..a4c0eb75f 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases', sum_sum_agg_0)], orderings=[(anything_anything_n_name):asc_first]) JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_regionkey': t0.anything_n_regionkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 0100d1072..df5869922 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases', sum_sum_agg_0)], orderings=[(anything_anything_n_name):asc_first]) JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_regionkey': t0.anything_n_regionkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 2e633f4f9..11018a2c2 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) diff --git a/tests/test_plan_refsols/epoch_event_gap_per_era.txt b/tests/test_plan_refsols/epoch_event_gap_per_era.txt index 8626c8529..da5390394 100644 --- a/tests/test_plan_refsols/epoch_event_gap_per_era.txt +++ b/tests/test_plan_refsols/epoch_event_gap_per_era.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('era_name', era_name), ('avg_event_gap', avg_event_gap)], orderings=[(anything_er_start_year):asc_first]) - AGGREGATE(keys={'er_name': er_name}, aggregations={'anything_er_start_year': ANYTHING(er_start_year), 'avg_event_gap': AVG(day_gap), 'era_name': ANYTHING(er_name)}) +ROOT(columns=[('era_name', er_name), ('avg_event_gap', avg_event_gap)], orderings=[(anything_er_start_year):asc_first]) + AGGREGATE(keys={'er_name': er_name}, aggregations={'anything_er_start_year': ANYTHING(er_start_year), 'avg_event_gap': AVG(day_gap)}) PROJECT(columns={'day_gap': DATEDIFF('days':string, PREV(args=[ev_dt], partition=[er_name], order=[(ev_dt):asc_last]), ev_dt), 'er_name': er_name, 'er_start_year': er_start_year}) JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, columns={'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 0e76d9871..1f8616b5a 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('season_name', anything_s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(anything_s_name):asc_first]) - JOIN(condition=t0.anything_s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'anything_s_name': t0.anything_s_name, 'n_rows': t1.n_rows, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) +ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) + JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_sql_refsols/epoch_event_gap_per_era_ansi.sql b/tests/test_sql_refsols/epoch_event_gap_per_era_ansi.sql index def99e785..584f057e8 100644 --- a/tests/test_sql_refsols/epoch_event_gap_per_era_ansi.sql +++ b/tests/test_sql_refsols/epoch_event_gap_per_era_ansi.sql @@ -13,7 +13,7 @@ WITH _t1 AS ( AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) ) SELECT - ANY_VALUE(er_name) AS era_name, + er_name AS era_name, AVG(day_gap) AS avg_event_gap FROM _t1 GROUP BY diff --git a/tests/test_sql_refsols/epoch_event_gap_per_era_sqlite.sql b/tests/test_sql_refsols/epoch_event_gap_per_era_sqlite.sql index 90e9de7f6..bbaebfead 100644 --- a/tests/test_sql_refsols/epoch_event_gap_per_era_sqlite.sql +++ b/tests/test_sql_refsols/epoch_event_gap_per_era_sqlite.sql @@ -16,7 +16,7 @@ WITH _t1 AS ( AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) ) SELECT - MAX(er_name) AS era_name, + er_name AS era_name, AVG(day_gap) AS avg_event_gap FROM _t1 GROUP BY diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql index 5d79982d0..e10c0c411 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql @@ -34,11 +34,11 @@ WITH _s0 AS ( searches.search_id ), _s16 AS ( SELECT - ANY_VALUE(_s0.s_name) AS anything_s_name, COUNT(*) AS n_rows, SUM(( NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - )) AS sum_is_intra_season + )) AS sum_is_intra_season, + _s0.s_name FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS DATETIME)) @@ -68,7 +68,7 @@ WITH _s0 AS ( _s10.s_name ) SELECT - _s16.anything_s_name AS season_name, + _s16.s_name AS season_name, ROUND(( 100.0 * COALESCE(_s16.sum_is_intra_season, 0) ) / _s16.n_rows, 2) AS pct_season_searches, @@ -77,6 +77,6 @@ SELECT ) / COALESCE(_s17.n_rows, 0), 2) AS pct_event_searches FROM _s16 AS _s16 LEFT JOIN _s17 AS _s17 - ON _s16.anything_s_name = _s17.s_name + ON _s16.s_name = _s17.s_name ORDER BY - _s16.anything_s_name + _s16.s_name diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql index 39f1ed3f2..7e86ea480 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql @@ -36,11 +36,11 @@ WITH _s0 AS ( searches.search_id ), _s16 AS ( SELECT - MAX(_s0.s_name) AS anything_s_name, COUNT(*) AS n_rows, SUM(( NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - )) AS sum_is_intra_season + )) AS sum_is_intra_season, + _s0.s_name FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = CAST(STRFTIME('%m', searches.search_ts) AS INTEGER) @@ -72,7 +72,7 @@ WITH _s0 AS ( _s10.s_name ) SELECT - _s16.anything_s_name AS season_name, + _s16.s_name AS season_name, ROUND(CAST(( 100.0 * COALESCE(_s16.sum_is_intra_season, 0) ) AS REAL) / _s16.n_rows, 2) AS pct_season_searches, @@ -84,6 +84,6 @@ SELECT ) AS pct_event_searches FROM _s16 AS _s16 LEFT JOIN _s17 AS _s17 - ON _s16.anything_s_name = _s17.s_name + ON _s16.s_name = _s17.s_name ORDER BY - _s16.anything_s_name + _s16.s_name From d2604287b6edd408699ddd012cc4fc1a788f1554 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 13:36:28 -0400 Subject: [PATCH 022/143] Added more aggregation simplification + tests --- pydough/conversion/projection_pullup.py | 175 ++++++++++++++++-- tests/test_pipeline_defog_custom.py | 123 ++++++++++++ .../agg_simplification_1.txt | 4 + .../agg_simplification_2.txt | 5 + .../aggregation_analytics_2.txt | 25 +-- .../aggregation_analytics_3.txt | 25 +-- .../month_year_sliding_windows.txt | 6 +- ...ograph_battery_failure_rates_anomalies.txt | 4 +- ..._error_rate_sun_set_by_factory_country.txt | 17 +- .../technograph_incident_rate_per_brand.txt | 4 +- .../technograph_most_unreliable_products.txt | 4 +- .../simple_pydough_functions.py | 105 +++++++++++ ...h_battery_failure_rates_anomalies_ansi.sql | 4 +- ...battery_failure_rates_anomalies_sqlite.sql | 4 +- ...r_rate_sun_set_by_factory_country_ansi.sql | 6 +- ...rate_sun_set_by_factory_country_sqlite.sql | 6 +- ...chnograph_incident_rate_per_brand_ansi.sql | 2 +- ...nograph_incident_rate_per_brand_sqlite.sql | 2 +- ...hnograph_most_unreliable_products_ansi.sql | 6 +- ...ograph_most_unreliable_products_sqlite.sql | 6 +- 20 files changed, 456 insertions(+), 77 deletions(-) create mode 100644 tests/test_plan_refsols/agg_simplification_1.txt create mode 100644 tests/test_plan_refsols/agg_simplification_2.txt diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 3ba23982a..a24f2e6c3 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -32,7 +32,7 @@ from pydough.relational.relational_expressions.column_reference_finder import ( ColumnReferenceFinder, ) -from pydough.types import NumericType +from pydough.types import BooleanType, NumericType from .merge_projects import merge_adjacent_projects @@ -96,7 +96,7 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: for name, expr in new_project_columns.items(): new_project_columns[name] = apply_substitution(expr, substitutions, {}) - return Project(input=node, columns=new_project_columns) + return merge_adjacent_projects(Project(input=node, columns=new_project_columns)) def pull_project_into_join(node: Join, input_index: int) -> None: @@ -290,7 +290,7 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: keys=new_keys, aggregations=new_aggs, ) - return Project(input=agg, columns=new_columns) + return merge_adjacent_projects(Project(input=agg, columns=new_columns)) def simplify_agg( @@ -304,25 +304,138 @@ def simplify_agg( } out_ref: RelationalExpression = ColumnReference(name, agg.data_type) arg: RelationalExpression - if agg.op in (pydop.SUM, pydop.COUNT) and len(agg.inputs) == 1: + + zero_expr: RelationalExpression = LiteralExpression(0, agg.data_type) + one_expr: RelationalExpression = LiteralExpression(1, agg.data_type) + count_star: CallExpression = CallExpression( + op=pydop.COUNT, + return_type=NumericType(), + inputs=[], + ) + + # Can optimize SUM, COUNT and NDISTINCT aggregations on literals. + if ( + agg.op in (pydop.SUM, pydop.COUNT, pydop.NDISTINCT) + and len(agg.inputs) == 1 + and isinstance(agg.inputs[0], LiteralExpression) + ): arg = agg.inputs[0] - if isinstance(arg, LiteralExpression) and isinstance( - arg.data_type, NumericType + if agg.op == pydop.SUM and ( + isinstance(arg.data_type, NumericType) or arg.value is None ): - if (agg.op == pydop.SUM and arg.value == 1) or ( - agg.op == pydop.COUNT and arg.value is not None - ): - return out_ref, CallExpression( - op=pydop.COUNT, + # SUM(NULL) -> NULL + if arg.value is None: + return arg, None + + # SUM(0) -> 0 + elif arg.value == 0: + return zero_expr, None + + # SUM(1) -> COUNT(*) + # SUM(n) = COUNT(*) * n + elif arg.value != 1: + out_ref = CallExpression( + op=pydop.MUL, return_type=agg.data_type, - inputs=[], + inputs=[out_ref, LiteralExpression(arg.value, agg.data_type)], ) + return out_ref, count_star - # If the aggregation is on a key, we can just return the key. - if agg.op in (pydop.SUM, pydop.MIN, pydop.MAX, pydop.ANYTHING): + elif agg.op == pydop.COUNT: + # COUNT(NULL) -> 0 + if arg.value is None: + return zero_expr, None + + # COUNT(n) -> COUNT(*) + else: + return out_ref, count_star + + elif agg.op == pydop.NDISTINCT: + # NDISTINCT(NULL) -> 0 + # NDISTINCT(n) -> 1 + return zero_expr if arg.value is None else one_expr, None + + # SUM(DEFAULT_TO(x, 0)) -> DEFAULT_TO(SUM(x), 0) + if ( + agg.op == pydop.SUM + and len(agg.inputs) == 1 + and isinstance(agg.inputs[0], CallExpression) + ): + if ( + agg.inputs[0].op == pydop.DEFAULT_TO + and isinstance(agg.inputs[0].inputs[1], LiteralExpression) + and isinstance(agg.inputs[0].inputs[1].data_type, NumericType) + and agg.inputs[0].inputs[1].value == 0 + ): + return CallExpression( + pydop.DEFAULT_TO, agg.data_type, [out_ref, zero_expr] + ), CallExpression(pydop.SUM, agg.data_type, [agg.inputs[0].inputs[0]]) + + # If the aggregation is on a key, we can just use the key. + if ( + agg.op + in ( + pydop.SUM, + pydop.MIN, + pydop.MAX, + pydop.ANYTHING, + pydop.AVG, + pydop.QUANTILE, + pydop.MEDIAN, + pydop.COUNT, + pydop.NDISTINCT, + ) + and len(agg.inputs) == 1 + ): arg = agg.inputs[0] if arg in reverse_keys: - return ColumnReference(reverse_keys[arg], agg.data_type), None + key_ref: RelationalExpression = ColumnReference( + reverse_keys[arg], agg.data_type + ) + + # COUNT(key) -> COUNT(*) * INTEGER(PRESENT(key)) + if agg.op == pydop.COUNT: + return CallExpression( + pydop.MUL, + agg.data_type, + [ + out_ref, + CallExpression( + pydop.INTEGER, + NumericType(), + [CallExpression(pydop.PRESENT, BooleanType(), [key_ref])], + ), + ], + ), count_star + + # NDISTINCT(key) -> INTEGER(PRESENT(key)) + if agg.op == pydop.NDISTINCT: + return CallExpression( + pydop.INTEGER, + NumericType(), + [CallExpression(pydop.PRESENT, BooleanType(), [key_ref])], + ), None + + # Otherwise, FUNC(key) -> key + return key_ref, None + + # If running a selection aggregation on a literal, can just return the + # input. + if ( + agg.op + in ( + pydop.MIN, + pydop.MAX, + pydop.ANYTHING, + pydop.AVG, + pydop.MEDIAN, + pydop.QUANTILE, + ) + and len(agg.inputs) >= 1 + ): + arg = agg.inputs[0] + if isinstance(arg, LiteralExpression): + return arg, None # In all other cases, we just return the aggregation as is. return out_ref, agg @@ -356,41 +469,67 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: for agg_name, agg_expr in node.aggregations.items(): match agg_expr.op: case pydop.COUNT if len(agg_expr.inputs) == 0: + # top_keys: {x, y} + # bottom_keys: {x, y} + # COUNT(*) -> ANYTHING(1) if len(bottom_only_keys) == 0: new_aggs[agg_name] = CallExpression( op=pydop.ANYTHING, return_type=agg_expr.data_type, inputs=[LiteralExpression(1, agg_expr.data_type)], ) + + # top_keys: {x, y} + # bottom_keys: {x, y, z} + # COUNT(*) -> NDISTINCT(z) elif len(bottom_only_keys) == 1: new_aggs[agg_name] = CallExpression( op=pydop.NDISTINCT, return_type=agg_expr.data_type, inputs=[next(iter(bottom_only_keys))], ) + + # Otherwise, the merge fails. else: return node + case pydop.SUM: + # SUM(SUM(x)) -> SUM(x) + # SUM(COUNT(x)) -> COUNT(x) input_expr = transpose_expression(agg_expr.inputs[0], input_agg.columns) if isinstance(input_expr, CallExpression) and input_expr.op in ( pydop.SUM, pydop.COUNT, ): new_aggs[agg_name] = input_expr + + # Otherwise, the merge fails. else: return node + case pydop.MIN | pydop.MAX | pydop.ANYTHING: + # MIN(MIN(x)) -> MIN(x) + # MIN(ANYTHING(x)) -> MIN(x) + # MAX(MAX(x)) -> MAX(x) + # MAX(ANYTHING(x)) -> MAX(x) + # ANYTHING(ANYTHING(x)) -> ANYTHING(x) input_expr = transpose_expression(agg_expr.inputs[0], input_agg.columns) - if ( - isinstance(input_expr, CallExpression) - and input_expr.op == agg_expr.op + if isinstance(input_expr, CallExpression) and input_expr.op in ( + agg_expr.op, + pydop.ANYTHING, ): new_aggs[agg_name] = input_expr + + # Otherwise, the merge fails. else: return node + + # Otherwise, the merge fails. case _: return node + # If none of the aggregations caused a merge failure, we can return a new + # Aggregate node using the top keys and the merged aggregation calls. return Aggregate( input=input_agg.input, keys=new_keys, diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index f2ff13365..450cdc43c 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -37,6 +37,8 @@ bad_rpad_8, ) from tests.test_pydough_functions.simple_pydough_functions import ( + agg_simplification_1, + agg_simplification_2, cumulative_stock_analysis, exponentiation, find, @@ -1419,6 +1421,127 @@ def get_day_of_week( ), id="window_sliding_frame_relsum", ), + pytest.param( + PyDoughPandasTest( + agg_simplification_1, + "Broker", + lambda: pd.DataFrame( + { + "aug_exchange": [None, 4, 6, 8], + "su1": [3, 4, 10, 4], + "su2": [6, 8, 20, 8], + "su3": [-3, -4, -10, -4], + "su4": [-9, -12, -30, -12], + "su5": [0, 0, 0, 0], + "su6": [1.5, 2.0, 5.0, 2.0], + "su7": [0, 0, 0, 0], + "su8": [0, 4, 6, 8], + "co1": [3, 4, 10, 4], + "co2": [3, 4, 10, 4], + "co3": [3, 4, 10, 4], + "co4": [3, 4, 10, 4], + "co5": [3, 4, 10, 4], + "co6": [3, 4, 10, 4], + "co7": [0, 0, 0, 0], + "co8": [0, 4, 10, 4], + "nd1": [1, 1, 1, 1], + "nd2": [1, 1, 1, 1], + "nd3": [1, 1, 1, 1], + "nd4": [1, 1, 1, 1], + "nd5": [1, 1, 1, 1], + "nd6": [1, 1, 1, 1], + "nd7": [0, 0, 0, 0], + "nd8": [0, 1, 1, 1], + "av1": [1, 1, 1, 1], + "av2": [2, 2, 2, 2], + "av3": [-1, -1, -1, -1], + "av4": [-3, -3, -3, -3], + "av5": [0, 0, 0, 0], + "av6": [0.5, 0.5, 0.5, 0.5], + "av7": [None, None, None, None], + "av8": [None, 4, 6, 8], + "mi1": [1, 1, 1, 1], + "mi2": [2, 2, 2, 2], + "mi3": [-1, -1, -1, -1], + "mi4": [-3, -3, -3, -3], + "mi5": [0, 0, 0, 0], + "mi6": [0.5, 0.5, 0.5, 0.5], + "mi7": [None, None, None, None], + "mi8": [None, 4, 6, 8], + "ma1": [1, 1, 1, 1], + "ma2": [2, 2, 2, 2], + "ma3": [-1, -1, -1, -1], + "ma4": [-3, -3, -3, -3], + "ma5": [0, 0, 0, 0], + "ma6": [0.5, 0.5, 0.5, 0.5], + "ma7": [None, None, None, None], + "ma8": [None, 4, 6, 8], + "an1": [1, 1, 1, 1], + "an2": [2, 2, 2, 2], + "an3": [-1, -1, -1, -1], + "an4": [-3, -3, -3, -3], + "an5": [0, 0, 0, 0], + "an6": [0.5, 0.5, 0.5, 0.5], + "an7": [None, None, None, None], + "an8": [None, 4, 6, 8], + "me1": [1.0, 1.0, 1.0, 1.0], + "me2": [2.0, 2.0, 2.0, 2.0], + "me3": [-1.0, -1.0, -1.0, -1.0], + "me4": [-3.0, -3.0, -3.0, -3.0], + "me5": [0.0, 0.0, 0.0, 0.0], + "me6": [0.5, 0.5, 0.5, 0.5], + "me7": [None, None, None, None], + "me8": [None, 4.0, 6.0, 8.0], + "qu1": [1, 1, 1, 1], + "qu2": [2, 2, 2, 2], + "qu3": [-1, -1, -1, -1], + "qu4": [-3, -3, -3, -3], + "qu5": [0, 0, 0, 0], + "qu6": [0.5, 0.5, 0.5, 0.5], + "qu7": [None, None, None, None], + "qu8": [None, 4, 6, 8], + } + ), + "agg_simplification_1", + order_sensitive=True, + ), + id="agg_simplification_1", + ), + pytest.param( + PyDoughPandasTest( + agg_simplification_2, + "Broker", + lambda: pd.DataFrame( + { + "state": ["CA", "FL", "NJ", "NY", "TX"], + "a1": [2, 1, 1, 1, 1], + "a2": [7, 3, 3, 4, 3], + "a3": [1, 0, 0, 3, 0], + "a4": [636307, 99303, 26403, 40008, 225000], + "a5": [ + "555-123-4567", + "555-370-2648", + "555-246-1357", + "555-135-7902", + "555-246-8135", + ], + "a6": [ + "555-864-2319", + "555-864-2319", + "555-987-6543", + "555-987-6543", + "555-753-1904", + ], + "a7": ["ca", "fl", "nj", "ny", "tx"], + "a8": ["ca", "fl", "nj", "ny", "tx"], + "a9": ["ca", "fl", "nj", "ny", "tx"], + } + ), + "agg_simplification_2", + order_sensitive=True, + ), + id="agg_simplification_2", + ), ], ) def defog_custom_pipeline_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt new file mode 100644 index 000000000..014c1dbdf --- /dev/null +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('aug_exchange', aug_exchange), ('su1', DEFAULT_TO(count_one, 0:numeric)), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', DEFAULT_TO(0:numeric, 0:numeric)), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) + AGGREGATE(keys={'aug_exchange': aug_exchange}, aggregations={'agg_63': QUANTILE(aug_exchange, 0.8:numeric), 'count_one': COUNT()}) + PROJECT(columns={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}) + SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/agg_simplification_2.txt b/tests/test_plan_refsols/agg_simplification_2.txt new file mode 100644 index 000000000..917655ad9 --- /dev/null +++ b/tests/test_plan_refsols/agg_simplification_2.txt @@ -0,0 +1,5 @@ +ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('a3', DEFAULT_TO(sum_nj, 0:numeric)), ('a4', DEFAULT_TO(DEFAULT_TO(sum_sz, 0:numeric), 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', max_anys), ('a9', anything_anys)], orderings=[(sbCustState):asc_first]) + AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'anything_anys': ANYTHING(anys), 'max_anys': MAX(anys), 'max_max_sbCustPhone': MAX(max_sbCustPhone), 'min_anys': MIN(anys), 'min_min_sbCustPhone': MIN(min_sbCustPhone), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_nj': SUM(nj), 'sum_sz': SUM(sum_expr_11)}) + PROJECT(columns={'anys': anything_expr_9, 'max_sbCustPhone': max_sbCustPhone, 'min_sbCustPhone': min_sbCustPhone, 'n_rows': n_rows, 'nj': count_expr_10, 'sbCustState': sbCustState, 'sum_expr_11': sum_expr_11}) + AGGREGATE(keys={'sbCustCity': sbCustCity, 'sbCustState': sbCustState}, aggregations={'anything_expr_9': ANYTHING(LOWER(sbCustState)), 'count_expr_10': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'max_sbCustPhone': MAX(sbCustPhone), 'min_sbCustPhone': MIN(sbCustPhone), 'n_rows': COUNT(), 'sum_expr_11': SUM(INTEGER(sbCustPostalCode))}) + SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 1d8f376f1..43a1d3b81 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,14 +1,15 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + PROJECT(columns={'anything_ps_partkey': ps_partkey, 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index df3d64e66..2e100b381 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,14 +1,15 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + PROJECT(columns={'anything_ps_partkey': ps_partkey, 'sum_l_quantity': sum_l_quantity, 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 333521c0c..cc7064f09 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,9 +1,9 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) - FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(DEFAULT_TO(sum_o_totalprice, 0:numeric))}) + FILTER(condition=DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(sum_o_totalprice)}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index df78044d3..050999da6 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) - AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) +ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) + AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 0c76e958a..314981554 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,11 +1,12 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_incidents, 0:numeric)}) + AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index fdf768b85..a64d4f50c 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) - AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) +ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) + AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 19a48c5d6..d27e1aa5b 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) +ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 40729db86..7ed47dc5e 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -2983,3 +2983,108 @@ def quantile_function_test_4(): orders_99_percent=QUANTILE(selected_orders.total_price, 0.99), orders_max=QUANTILE(selected_orders.total_price, 1.0), ) + + +def agg_simplification_1(): + # TODO + kwargs = {} + args = [ + tickers.one, + tickers.two, + tickers.negative_one, + tickers.negative_three, + tickers.zero, + tickers.half, + tickers.null, + tickers.aug_exchange, + ] + functions = [ + ("su", SUM), + ("co", COUNT), + ("nd", NDISTINCT), + ("av", AVG), + ("mi", MIN), + ("ma", MAX), + ("an", ANYTHING), + ("me", MEDIAN), + ] + for prefix, func in functions: + for idx, arg in enumerate(args): + kwargs[f"{prefix}{idx + 1}"] = func(arg) + for idx, arg in enumerate(args): + kwargs[f"qu{idx + 1}"] = QUANTILE(arg, (idx + 1) / 10) + return ( + tickers.CALCULATE( + aug_exchange=LENGTH(KEEP_IF(exchange, exchange != "NYSE Arca")) + ) + .CALCULATE( + one=1, + two=2, + negative_one=-1, + negative_three=-3, + zero=0, + half=0.5, + null=None, + ) + .PARTITION(name="exchanges", by=aug_exchange) + .CALCULATE( + aug_exchange, + **kwargs, + ) + .ORDER_BY(aug_exchange.ASC()) + ) + + +""" +SELECT + LENGTH(NULLIF(sbTickerExchange, 'NYSE Arca')) AS aug_exchange, + COUNT(*) +from main.sbticker +GROUP BY 1 +ORDER BY 1 +; + +|3 +NASDAQ|10 +NYSE|4 +Vanguard|4 +[None, 4, 6, 8] +[3, 10, 4, 4] +""" + + +def agg_simplification_2(): + # TODO + return ( + customers.PARTITION(name="cities", by=(city, state)) + .CALCULATE( + n=COUNT(customers), + nj=COUNT(KEEP_IF(customers.name, STARTSWITH(LOWER(customers.name), "j"))), + sz=SUM(INTEGER(customers.postal_code)), + minp=MIN(customers.phone), + maxp=MAX(customers.phone), + anys=ANYTHING(LOWER(customers.state)), + ) + .PARTITION(name="states", by=state) + .CALCULATE( + state, + a1=COUNT(cities), + a2=SUM(cities.n), + a3=SUM(cities.nj), + a4=SUM(cities.sz), + a5=MIN(cities.minp), + a6=MAX(cities.maxp), + a7=MIN(cities.anys), + a8=MAX(cities.anys), + a9=ANYTHING(cities.anys), + ) + .ORDER_BY(state.ASC()) + ) + + +""" +SELECT sbCustState, sbCustCity, COUNT(*), COUNT(CASE WHEN LOWER(sbCustName) LIKE 'j%' THEN 1 END), SUM(CAST(sbCustPostalCode AS INTEGER)), MIN(sbCustPhone), MAX(sbCustPhone), MAX(LOWER(sbCustState)) +FROM main.sbcustomer +GROUP BY 1, 2 +ORDER BY 1, 2; +""" diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql index 954197d6d..e665da566 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql @@ -11,7 +11,7 @@ WITH _s7 AS ( SELECT countries.co_name AS country_name, products.pr_name AS product_name, - ROUND(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) / COUNT(*), 2) AS ir + ROUND(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) / COUNT(*), 2) AS ir FROM main.countries AS countries JOIN main.devices AS devices ON countries.co_id = devices.de_production_country_id @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ROUND(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) / COUNT(*), 2) DESC, + ROUND(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql index 5462388f5..8c6123891 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql @@ -11,7 +11,7 @@ WITH _s7 AS ( SELECT countries.co_name AS country_name, products.pr_name AS product_name, - ROUND(CAST(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) AS ir + ROUND(CAST(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) AS REAL) / COUNT(*), 2) AS ir FROM main.countries AS countries JOIN main.devices AS devices ON countries.co_id = devices.de_production_country_id @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ROUND(CAST(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) DESC, + ROUND(CAST(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) AS REAL) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index 99187ac49..103672272 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -7,9 +7,9 @@ WITH _s3 AS ( in_device_id ), _s5 AS ( SELECT - COUNT(*) AS n_rows, - SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, - devices.de_production_country_id + COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, + devices.de_production_country_id, + COUNT(*) AS n_rows FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index 3180a6be4..a6b037b5e 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -7,9 +7,9 @@ WITH _s3 AS ( in_device_id ), _s5 AS ( SELECT - COUNT(*) AS n_rows, - SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, - devices.de_production_country_id + COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, + devices.de_production_country_id, + COUNT(*) AS n_rows FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql index 96b348eee..ad09ee111 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ) SELECT products.pr_brand AS brand, - ROUND(COALESCE(SUM(COALESCE(_s3.n_rows, 0)), 0) / COUNT(*), 2) AS ir + ROUND(COALESCE(COALESCE(SUM(_s3.n_rows), 0), 0) / COUNT(*), 2) AS ir FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql index bf9decd5e..864caeee4 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ) SELECT products.pr_brand AS brand, - ROUND(CAST(COALESCE(SUM(COALESCE(_s3.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) AS ir + ROUND(CAST(COALESCE(COALESCE(SUM(_s3.n_rows), 0), 0) AS REAL) / COUNT(*), 2) AS ir FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql index 276282dbe..572bff6d9 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ), _s5 AS ( SELECT COUNT(*) AS n_rows, - SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, + SUM(_s3.n_rows) AS sum_n_incidents, devices.de_product_id FROM main.devices AS devices JOIN main.products AS products @@ -22,10 +22,10 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) AS ir + ROUND(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) DESC + ROUND(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql index 60fd910bb..4f0d45267 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ), _s5 AS ( SELECT COUNT(*) AS n_rows, - SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, + SUM(_s3.n_rows) AS sum_n_incidents, devices.de_product_id FROM main.devices AS devices JOIN main.products AS products @@ -22,10 +22,10 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) AS ir + ROUND(CAST(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) AS REAL) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) DESC + ROUND(CAST(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) AS REAL) / _s5.n_rows, 2) DESC LIMIT 5 From 856e1a9e12539e73fbc5bb23cc16a1f938c628df Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 13:51:56 -0400 Subject: [PATCH 023/143] Adjusting parameters of optimization --- pydough/conversion/relational_converter.py | 10 +- .../agg_simplification_2.txt | 8 +- tests/test_plan_refsols/aggregate_semi.txt | 4 +- .../aggregation_analytics_2.txt | 25 +- .../aggregation_analytics_3.txt | 25 +- tests/test_plan_refsols/common_prefix_c.txt | 31 ++- tests/test_plan_refsols/common_prefix_d.txt | 35 ++- tests/test_plan_refsols/common_prefix_h.txt | 33 ++- tests/test_plan_refsols/common_prefix_o.txt | 4 +- .../month_year_sliding_windows.txt | 11 +- ...ple_simple_aggregations_multiple_calcs.txt | 8 +- .../test_plan_refsols/supplier_best_part.txt | 4 +- .../technograph_monthly_incident_rate.txt | 49 ++-- ...umulative_incident_rate_goldcopperstar.txt | 33 ++- ..._year_cumulative_incident_rate_overall.txt | 21 +- tests/test_plan_refsols/tpch_q7.txt | 25 +- .../various_aggfuncs_simple.txt | 4 +- .../simple_pydough_functions.py | 42 +-- tests/test_pydough_to_sql.py | 14 + .../agg_simplification_1_ansi.sql | 87 +++++++ .../agg_simplification_1_sqlite.sql | 239 ++++++++++++++++++ .../agg_simplification_2_ansi.sql | 16 ++ .../agg_simplification_2_sqlite.sql | 16 ++ .../defog_dealership_basic5_ansi.sql | 6 +- .../defog_dealership_basic5_sqlite.sql | 6 +- 25 files changed, 550 insertions(+), 206 deletions(-) create mode 100644 tests/test_sql_refsols/agg_simplification_1_ansi.sql create mode 100644 tests/test_sql_refsols/agg_simplification_1_sqlite.sql create mode 100644 tests/test_sql_refsols/agg_simplification_2_ansi.sql create mode 100644 tests/test_sql_refsols/agg_simplification_2_sqlite.sql diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index ac67d9852..09ed9802b 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1443,12 +1443,10 @@ def optimize_relational_tree( # possible. root = bubble_column_names(root) - # Step 8: run projection pullup. - # print() - # print(root.to_tree_string()) - root = confirm_root(pullup_projections(root)) - # print() - # print(root.to_tree_string()) + # Step 8: run projection pullup followed by column pruning 2x. + for _ in range(2): + root = confirm_root(pullup_projections(root)) + root = ColumnPruner().prune_unused_columns(root) # Step 9: re-run filter pushdown root._input = push_filters(root.input, set()) diff --git a/tests/test_plan_refsols/agg_simplification_2.txt b/tests/test_plan_refsols/agg_simplification_2.txt index 917655ad9..c1121462b 100644 --- a/tests/test_plan_refsols/agg_simplification_2.txt +++ b/tests/test_plan_refsols/agg_simplification_2.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('a3', DEFAULT_TO(sum_nj, 0:numeric)), ('a4', DEFAULT_TO(DEFAULT_TO(sum_sz, 0:numeric), 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', max_anys), ('a9', anything_anys)], orderings=[(sbCustState):asc_first]) - AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'anything_anys': ANYTHING(anys), 'max_anys': MAX(anys), 'max_max_sbCustPhone': MAX(max_sbCustPhone), 'min_anys': MIN(anys), 'min_min_sbCustPhone': MIN(min_sbCustPhone), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_nj': SUM(nj), 'sum_sz': SUM(sum_expr_11)}) - PROJECT(columns={'anys': anything_expr_9, 'max_sbCustPhone': max_sbCustPhone, 'min_sbCustPhone': min_sbCustPhone, 'n_rows': n_rows, 'nj': count_expr_10, 'sbCustState': sbCustState, 'sum_expr_11': sum_expr_11}) - AGGREGATE(keys={'sbCustCity': sbCustCity, 'sbCustState': sbCustState}, aggregations={'anything_expr_9': ANYTHING(LOWER(sbCustState)), 'count_expr_10': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'max_sbCustPhone': MAX(sbCustPhone), 'min_sbCustPhone': MIN(sbCustPhone), 'n_rows': COUNT(), 'sum_expr_11': SUM(INTEGER(sbCustPostalCode))}) - SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) +ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('a3', DEFAULT_TO(sum_nj, 0:numeric)), ('a4', DEFAULT_TO(DEFAULT_TO(sum_sz, 0:numeric), 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', min_anys), ('a9', min_anys)], orderings=[(sbCustState):asc_first]) + AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'max_max_sbCustPhone': MAX(sbCustPhone), 'min_anys': ANYTHING(LOWER(sbCustState)), 'min_min_sbCustPhone': MIN(sbCustPhone), 'n_rows': NDISTINCT(sbCustCity), 'sum_n_rows': COUNT(), 'sum_nj': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'sum_sz': SUM(INTEGER(sbCustPostalCode))}) + SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 8b74b522a..7e7f2a981 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(n_rows, 0:numeric)), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice_1': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 43a1d3b81..1d8f376f1 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,15 +1,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - PROJECT(columns={'anything_ps_partkey': ps_partkey, 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 2e100b381..df3d64e66 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,15 +1,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - PROJECT(columns={'anything_ps_partkey': ps_partkey, 'sum_l_quantity': sum_l_quantity, 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index c7f3f076b..3b51f1f96 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,18 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows_1': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index a9e20fe8d..498795765 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,24 +1,23 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(agg_29), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index fc806c3fc..2001b4675 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0_1)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_0, 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0_1': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows_1': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 808be5ed1..056f09af0 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index cc7064f09..41ba7be61 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,12 +1,11 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) + JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) FILTER(condition=DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(sum_o_totalprice)}) - AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index 2186c653c..afbab2010 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal_1': AVG(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal_1': AVG(s_acctbal), 'max_s_acctbal_1': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 5dbeb3b2f..f0f4f0fb3 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -7,9 +7,9 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 26d7d259d..0bb678bcf 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,28 +1,27 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': MONTH(ca_dt), 'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1, 'year': year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'month': MONTH(ca_dt), 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index f51c8a594..613013975 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -4,24 +4,23 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) - AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_4': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index dd5c32202..45394da84 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,16 +1,15 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) - AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index 8be0f5377..a98bd9b69 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,16 +1,15 @@ ROOT(columns=[('SUPP_NATION', n_name), ('CUST_NATION', cust_nation), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(n_name):asc_first, (cust_nation):asc_first, (l_year):asc_first]) AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': YEAR(l_shipdate), 'n_name': n_name}, aggregations={'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) - PROJECT(columns={'cust_nation': name_8, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name}) - FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name, 'name_8': name_8}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'cust_nation': name_8, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index 44a91ea4a..49fed6fa7 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal': COUNT(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal_1': COUNT(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'min_c_acctbal_1': MIN(c_acctbal), 'n_rows_1': COUNT(), 'sum_c_acctbal_1': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 7ed47dc5e..3df9ca79a 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -2986,7 +2986,11 @@ def quantile_function_test_4(): def agg_simplification_1(): - # TODO + # Partition the tickers on the value + # `LENGTH(KEEP_IF(exchange, exchange != "NYSE Arca"))`, then for every + # combination of 1, 2, -1, -3, 0, 0.5, null, and the partition key, call + # the aggregation functions SUM, COUNT, NDISTINCT, AVG, MIN, MAX, + # ANYTHING, and MEDIAN, and QUANTILE on each of the inputs. kwargs = {} args = [ tickers.one, @@ -3035,26 +3039,16 @@ def agg_simplification_1(): ) -""" -SELECT - LENGTH(NULLIF(sbTickerExchange, 'NYSE Arca')) AS aug_exchange, - COUNT(*) -from main.sbticker -GROUP BY 1 -ORDER BY 1 -; - -|3 -NASDAQ|10 -NYSE|4 -Vanguard|4 -[None, 4, 6, 8] -[3, 10, 4, 4] -""" - - def agg_simplification_2(): - # TODO + # Partition the customers by city/state then by state to compute the + # following aggregations per-state: + # 1. Number of cities pers state + # 2. Total number of customers per state + # 3. Total postal code sum per state + # 4. Total number of customers with names starting with "j" per state + # 5. Minimum phone number per state + # 6. Maximum phone number per state + # 7-9: Convoluted ways to pass around the lowercase state name return ( customers.PARTITION(name="cities", by=(city, state)) .CALCULATE( @@ -3080,11 +3074,3 @@ def agg_simplification_2(): ) .ORDER_BY(state.ASC()) ) - - -""" -SELECT sbCustState, sbCustCity, COUNT(*), COUNT(CASE WHEN LOWER(sbCustName) LIKE 'j%' THEN 1 END), SUM(CAST(sbCustPostalCode AS INTEGER)), MIN(sbCustPhone), MAX(sbCustPhone), MAX(LOWER(sbCustState)) -FROM main.sbcustomer -GROUP BY 1, 2 -ORDER BY 1, 2; -""" diff --git a/tests/test_pydough_to_sql.py b/tests/test_pydough_to_sql.py index 00530b6a4..7af97012f 100644 --- a/tests/test_pydough_to_sql.py +++ b/tests/test_pydough_to_sql.py @@ -26,6 +26,8 @@ window_functions, ) from tests.test_pydough_functions.simple_pydough_functions import ( + agg_simplification_1, + agg_simplification_2, cumulative_stock_analysis, datediff, datetime_sampler, @@ -236,6 +238,18 @@ def test_pydough_to_sql_tpch( "Broker", id="week_offset", ), + pytest.param( + agg_simplification_1, + "agg_simplification_1", + "Broker", + id="agg_simplification_1", + ), + pytest.param( + agg_simplification_2, + "agg_simplification_2", + "Broker", + id="agg_simplification_2", + ), pytest.param( cumulative_stock_analysis, "cumulative_stock_analysis", diff --git a/tests/test_sql_refsols/agg_simplification_1_ansi.sql b/tests/test_sql_refsols/agg_simplification_1_ansi.sql new file mode 100644 index 000000000..6c346edc0 --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_1_ansi.sql @@ -0,0 +1,87 @@ +SELECT + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, + COALESCE(COUNT(*), 0) AS su1, + COALESCE(COUNT(*) * 2, 0) AS su2, + COALESCE(COUNT(*) * -1, 0) AS su3, + COALESCE(COUNT(*) * -3, 0) AS su4, + 0 AS su5, + COALESCE(COUNT(*) * 0.5, 0) AS su6, + COALESCE(NULL, 0) AS su7, + COALESCE( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), + 0 + ) AS su8, + COUNT(*) AS co1, + COUNT(*) AS co2, + COUNT(*) AS co3, + COUNT(*) AS co4, + COUNT(*) AS co5, + COUNT(*) AS co6, + 0 AS co7, + COUNT(*) * CAST(NOT ( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) IS NULL + ) AS BIGINT) AS co8, + 1 AS nd1, + 1 AS nd2, + 1 AS nd3, + 1 AS nd4, + 1 AS nd5, + 1 AS nd6, + 0 AS nd7, + CAST(NOT ( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) IS NULL + ) AS BIGINT) AS nd8, + 1 AS av1, + 2 AS av2, + -1 AS av3, + -3 AS av4, + 0 AS av5, + 0.5 AS av6, + NULL AS av7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS av8, + 1 AS mi1, + 2 AS mi2, + -1 AS mi3, + -3 AS mi4, + 0 AS mi5, + 0.5 AS mi6, + NULL AS mi7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS mi8, + 1 AS ma1, + 2 AS ma2, + -1 AS ma3, + -3 AS ma4, + 0 AS ma5, + 0.5 AS ma6, + NULL AS ma7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS ma8, + 1 AS an1, + 2 AS an2, + -1 AS an3, + -3 AS an4, + 0 AS an5, + 0.5 AS an6, + NULL AS an7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS an8, + 1 AS me1, + 2 AS me2, + -1 AS me3, + -3 AS me4, + 0 AS me5, + 0.5 AS me6, + NULL AS me7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS me8, + 1 AS qu1, + 2 AS qu2, + -1 AS qu3, + -3 AS qu4, + 0 AS qu5, + 0.5 AS qu6, + NULL AS qu7, + PERCENTILE_DISC(0.8) WITHIN GROUP (ORDER BY + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) NULLS LAST) AS qu8 +FROM main.sbticker +GROUP BY + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) +ORDER BY + aug_exchange diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql new file mode 100644 index 000000000..4716a0c4c --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -0,0 +1,239 @@ +WITH _t1 AS ( + SELECT + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 1 DESC) - 1.0 + ) - ( + CAST(( + COUNT(1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN 1 + ELSE NULL + END AS expr_72, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 2 DESC) - 1.0 + ) - ( + CAST(( + COUNT(2) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN 2 + ELSE NULL + END AS expr_73, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY -1 DESC) - 1.0 + ) - ( + CAST(( + COUNT(-1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN -1 + ELSE NULL + END AS expr_74, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY -3 DESC) - 1.0 + ) - ( + CAST(( + COUNT(-3) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN -3 + ELSE NULL + END AS expr_75, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 0 DESC) - 1.0 + ) - ( + CAST(( + COUNT(0) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN 0 + ELSE NULL + END AS expr_76, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 0.5 DESC) - 1.0 + ) - ( + CAST(( + COUNT(0.5) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN 0.5 + ELSE NULL + END AS expr_77, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY NULL DESC) - 1.0 + ) - ( + CAST(( + COUNT(NULL) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN NULL + ELSE NULL + END AS expr_78, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) DESC) - 1.0 + ) - ( + CAST(( + COUNT( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ELSE NULL + END AS expr_79, + CASE + WHEN CAST(0.9 * COUNT(1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 1 DESC) + THEN 1 + ELSE NULL + END AS expr_80, + CASE + WHEN CAST(0.8 * COUNT(2) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 2 DESC) + THEN 2 + ELSE NULL + END AS expr_81, + CASE + WHEN CAST(0.7 * COUNT(-1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY -1 DESC) + THEN -1 + ELSE NULL + END AS expr_82, + CASE + WHEN CAST(0.6 * COUNT(-3) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY -3 DESC) + THEN -3 + ELSE NULL + END AS expr_83, + CASE + WHEN CAST(0.5 * COUNT(0) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 0 DESC) + THEN 0 + ELSE NULL + END AS expr_84, + CASE + WHEN CAST(0.4 * COUNT(0.5) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 0.5 DESC) + THEN 0.5 + ELSE NULL + END AS expr_85, + CASE + WHEN CAST(0.30000000000000004 * COUNT(NULL) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY NULL DESC) + THEN NULL + ELSE NULL + END AS expr_86, + CASE + WHEN CAST(0.19999999999999996 * COUNT( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) DESC) + THEN LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ELSE NULL + END AS expr_87, + sbtickerexchange + FROM main.sbticker +) +SELECT + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, + COALESCE(COUNT(*), 0) AS su1, + COALESCE(COUNT(*) * 2, 0) AS su2, + COALESCE(COUNT(*) * -1, 0) AS su3, + COALESCE(COUNT(*) * -3, 0) AS su4, + 0 AS su5, + COALESCE(COUNT(*) * 0.5, 0) AS su6, + COALESCE(NULL, 0) AS su7, + COALESCE( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), + 0 + ) AS su8, + COUNT(*) AS co1, + COUNT(*) AS co2, + COUNT(*) AS co3, + COUNT(*) AS co4, + COUNT(*) AS co5, + COUNT(*) AS co6, + 0 AS co7, + COUNT(*) * CAST(NOT ( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) IS NULL + ) AS INTEGER) AS co8, + 1 AS nd1, + 1 AS nd2, + 1 AS nd3, + 1 AS nd4, + 1 AS nd5, + 1 AS nd6, + 0 AS nd7, + CAST(NOT ( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) IS NULL + ) AS INTEGER) AS nd8, + 1 AS av1, + 2 AS av2, + -1 AS av3, + -3 AS av4, + 0 AS av5, + 0.5 AS av6, + NULL AS av7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS av8, + 1 AS mi1, + 2 AS mi2, + -1 AS mi3, + -3 AS mi4, + 0 AS mi5, + 0.5 AS mi6, + NULL AS mi7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS mi8, + 1 AS ma1, + 2 AS ma2, + -1 AS ma3, + -3 AS ma4, + 0 AS ma5, + 0.5 AS ma6, + NULL AS ma7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS ma8, + 1 AS an1, + 2 AS an2, + -1 AS an3, + -3 AS an4, + 0 AS an5, + 0.5 AS an6, + NULL AS an7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS an8, + AVG(expr_72) AS me1, + AVG(expr_73) AS me2, + AVG(expr_74) AS me3, + AVG(expr_75) AS me4, + AVG(expr_76) AS me5, + AVG(expr_77) AS me6, + AVG(expr_78) AS me7, + AVG(expr_79) AS me8, + MAX(expr_80) AS qu1, + MAX(expr_81) AS qu2, + MAX(expr_82) AS qu3, + MAX(expr_83) AS qu4, + MAX(expr_84) AS qu5, + MAX(expr_85) AS qu6, + MAX(expr_86) AS qu7, + MAX(expr_87) AS qu8 +FROM _t1 +GROUP BY + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) +ORDER BY + aug_exchange diff --git a/tests/test_sql_refsols/agg_simplification_2_ansi.sql b/tests/test_sql_refsols/agg_simplification_2_ansi.sql new file mode 100644 index 000000000..d36f0482c --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_2_ansi.sql @@ -0,0 +1,16 @@ +SELECT + sbcuststate AS state, + COUNT(DISTINCT sbcustcity) AS a1, + COALESCE(COUNT(*), 0) AS a2, + COALESCE(COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END), 0) AS a3, + COALESCE(COALESCE(SUM(CAST(sbcustpostalcode AS BIGINT)), 0), 0) AS a4, + MIN(sbcustphone) AS a5, + MAX(sbcustphone) AS a6, + ANY_VALUE(LOWER(sbcuststate)) AS a7, + ANY_VALUE(LOWER(sbcuststate)) AS a8, + ANY_VALUE(LOWER(sbcuststate)) AS a9 +FROM main.sbcustomer +GROUP BY + sbcuststate +ORDER BY + sbcuststate diff --git a/tests/test_sql_refsols/agg_simplification_2_sqlite.sql b/tests/test_sql_refsols/agg_simplification_2_sqlite.sql new file mode 100644 index 000000000..deee0c7a6 --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_2_sqlite.sql @@ -0,0 +1,16 @@ +SELECT + sbcuststate AS state, + COUNT(DISTINCT sbcustcity) AS a1, + COALESCE(COUNT(*), 0) AS a2, + COALESCE(COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END), 0) AS a3, + COALESCE(COALESCE(SUM(CAST(sbcustpostalcode AS INTEGER)), 0), 0) AS a4, + MIN(sbcustphone) AS a5, + MAX(sbcustphone) AS a6, + MAX(LOWER(sbcuststate)) AS a7, + MAX(LOWER(sbcuststate)) AS a8, + MAX(LOWER(sbcuststate)) AS a9 +FROM main.sbcustomer +GROUP BY + sbcuststate +ORDER BY + sbcuststate diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 4239e292e..9a9471dea 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS n_rows, + COUNT(*) AS n_rows_1, SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales @@ -12,11 +12,11 @@ WITH _s1 AS ( SELECT salespersons.first_name, salespersons.last_name, - _s1.n_rows AS total_sales, + _s1.n_rows_1 AS total_sales, COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - _s1.n_rows DESC + _s1.n_rows_1 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index 796cac46a..9f797c2bc 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS n_rows, + COUNT(*) AS n_rows_1, SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales @@ -14,11 +14,11 @@ WITH _s1 AS ( SELECT salespersons.first_name, salespersons.last_name, - _s1.n_rows AS total_sales, + _s1.n_rows_1 AS total_sales, COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - _s1.n_rows DESC + _s1.n_rows_1 DESC LIMIT 5 From c4298cb4599d0e2cdbc584edafa90f87884844d8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 21:56:09 -0400 Subject: [PATCH 024/143] Pulled out common logic from filter/join/limit and added comments --- pydough/conversion/projection_pullup.py | 260 +++++++++++++++++------- 1 file changed, 183 insertions(+), 77 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index a24f2e6c3..80e267b84 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -41,22 +41,46 @@ def widen_columns( node: RelationalNode, ) -> dict[RelationalExpression, RelationalExpression]: """ - TODO + Modifies a relational node in-place to ensure every column in the node's + inputs is also present in the node's output columns. Returns a substitution + mapping such that any expression pulled into the parent of the node can be + transformed to point to the node's output columns. + + Args: + `node`: The relational node to "widen" by adding more columns to. + + Returns: + A mapping that can be used for substitution if expressions from the + node are pulled up into the parent of the node. """ + # The substitution mapping that will be built by the functiona nd returned + # to the calling site. + substitutions: dict[RelationalExpression, RelationalExpression] = {} + + # Mapping of every expression in the node's columns to a reference to the + # column of the node that points to it. This is used to keep track of which + # expressions are already present in the node's columns versus the ones that + # should be added to un-prune the node. existing_vals: dict[RelationalExpression, RelationalExpression] = { expr: ColumnReference(name, expr.data_type) for name, expr in node.columns.items() } - substitutions: dict[RelationalExpression, RelationalExpression] = {} + + # Pull all the columns from each input to the node into the node's output + # columns if they are not already in the node's output columns. for input_idx in range(len(node.inputs)): input_alias: str | None = node.default_input_aliases[input_idx] input_node: RelationalNode = node.inputs[input_idx] for name, expr in input_node.columns.items(): + # If the current node is a Join, add input names to the expression. if isinstance(node, Join): expr = add_input_name(expr, input_alias) ref_expr: ColumnReference = ColumnReference( name, expr.data_type, input_name=input_alias ) + # If the expression is not already in the node's columns, then + # inject it so the node can use it later if a pull-up occurs that + # would need to reference this expression. if expr not in existing_vals: new_name: str = name idx: int = 0 @@ -69,16 +93,35 @@ def widen_columns( substitutions[ref_expr] = new_ref else: substitutions[ref_expr] = existing_vals[expr] + + # Return the substitution mapping, without any no-op substitutions return {k: v for k, v in substitutions.items() if k != v} -def pull_non_columns(node: RelationalNode) -> RelationalNode: +def pull_non_columns(node: Join | Filter | Limit) -> RelationalNode: """ - TODO + Pulls up non-column expressions from the output columns of a Join, Filter, + or Limit node into a parent projection. + + Args: + `node`: The Join, Filter, or Limit node to pull up non-column expressions + from. + + Returns: + Either the original node if this rewrite is not applicable, or a + project node that contains the non-column expressions pulled up from + the output columns of the node, pointing to `node` as its input. """ + # The columns that will be used in the parent projection. new_project_columns: dict[str, RelationalExpression] = {} + + # A boolean to indicate if any columns were pulled besides no-ops. If this + # never becomes true, then we skip the rewrite and return the original. needs_pull: bool = False + # Iterate through the columns of the node and check if they are column + # references or not. If they are not, then we need to pull them up into + # the parent projection. for name, expr in node.columns.items(): if isinstance(expr, ColumnReference): new_project_columns[name] = ColumnReference(name, expr.data_type) @@ -86,59 +129,144 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: new_project_columns[name] = expr needs_pull = True + # Skip the rewrite if no columns were pulled up. if not needs_pull: return node + # Ensure every column in the node's inputs is also present in the output + # columns of the node. This will ensure that any function calls that are + # pulled into a parent projection can have their inputs substituted with + # references to the node's output columns. Ensure the substitutions do not + # have any input names in the values. substitutions: dict[RelationalExpression, RelationalExpression] = widen_columns( node ) substitutions = {k: add_input_name(v, None) for k, v in substitutions.items()} + + # Create the columns of the new projection by applying the substitutions + # to the expressions pulled up earlier. for name, expr in new_project_columns.items(): new_project_columns[name] = apply_substitution(expr, substitutions, {}) - return merge_adjacent_projects(Project(input=node, columns=new_project_columns)) + # Build the new project node pointing to the input but with the new columns. + return Project(input=node, columns=new_project_columns) -def pull_project_into_join(node: Join, input_index: int) -> None: +def pull_project_helper( + columns: dict[str, RelationalExpression], + conditions: list[RelationalExpression], + ordering: list[ExpressionSortInfo], + project: Project, + input_name: str | None, +) -> dict[RelationalExpression, RelationalExpression]: """ - TODO + Main helper utility for pulling up columns from a Project node into a + a parent Filter/Join/Limit node. This function modifies the input project + in-place to ensure every column in the project's inputs is available + to the parent node, and returns a mapping of expressions that can be used + to substitute the columns in the parent node's output columns or conditions. + + Args: + `columns`: The columns of the parent node that the expressions from the + project node can be pulled into. + `conditions`: The condition of the parent node that the expressions + from the project node can be pulled into. This is a list so that + nodes without a condition can pass it in as empty. + `ordering`: The orderings of the parent node that the expressions from + the project node can be pulled into. This is a list so that nodes + without orderings can pass it in as empty. + `project`: The Project node to pull columns from. + `input_name`: The name of the input to the parent node that the project + node is connected to. This is used to add input names to the + expressions pulled from the project node when dealing with joins. + + Returns: + A mapping of expressions that can be used to substitute the columns in + the parent node's output columns or conditions. This mapping will + ensure columns are only pulled up if they do not contain window + functions, and they are not simultaneously used in the parent's output + while also being used in the condition or orderings. """ - if not isinstance(node.inputs[input_index], Project): - return - - project = node.inputs[input_index] - assert isinstance(project, Project) - - input_name: str | None = node.default_input_aliases[input_index] + # Ensure every column in the project's inputs is also present in the output + # columns of the project. This will ensure that any function calls that are + # pulled into the parent can have their inputs substituted with references + # to columns from the project. + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) + # Identify which columns from the project node are used in the condition + # or orderings, versus those used in the output columns of the parent. finder: ColumnReferenceFinder = ColumnReferenceFinder() + + # First, the columns used in the output columns of the parent. finder.reset() - node.condition.accept(finder) - condition_cols: set[ColumnReference] = finder.get_column_references() - condition_names: set[str] = {col.name for col in condition_cols} - finder.reset() - for expr in node.columns.values(): + for expr in columns.values(): expr.accept(finder) output_cols: set[ColumnReference] = finder.get_column_references() output_names: set[str] = {col.name for col in output_cols} - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( - widen_columns(project) - ) + # Next the columns used in the condition or orderings + finder.reset() + for cond in conditions: + cond.accept(finder) + for order_expr in ordering: + order_expr.expr.accept(finder) + used_cols: set[ColumnReference] = finder.get_column_references() + used_names: set[str] = {col.name for col in used_cols} + # Iterate through the columns of the project to see which ones can be + # pulled up into the parent's output columns vs condition/orderings, + # adding them to a substitutions mapping that will be used to apply the + # transformations. substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): new_expr: RelationalExpression = add_input_name( apply_substitution(expr, transfer_substitutions, {}), input_name ) if (not contains_window(new_expr)) and ( - (name in condition_names) != (name in output_names) + (name in used_names) != (name in output_names) ): ref_expr: ColumnReference = ColumnReference( name, expr.data_type, input_name=input_name ) substitutions[ref_expr] = new_expr + return substitutions + +def pull_project_into_join(node: Join, input_index: int) -> None: + """ + Attempts to pull columns from a Project node that is an input to a Join + into the output columns of the Join node, and into its join condition. + This transformation is done in-place. + + Args: + `node`: The Join node to pull the Project columns into. + `input_index`: The index of the input to the Join node that should have + its columns pulled up, if it is a project node. + """ + + # Skip if the input at the specified input is not a Project node. + if not isinstance(node.inputs[input_index], Project): + return + project = node.inputs[input_index] + assert isinstance(project, Project) + + # Invoke the common helper for Join/Filter/Limit to identify which columns + # from the project can be pulled up into the join's output columns or + # condition, and modifies the project node in-place to ensure every + # column in the project's inputs is available to the current node. + substitutions: dict[RelationalExpression, RelationalExpression] = ( + pull_project_helper( + node.columns, + [node.condition], + [], + project, + node.default_input_aliases[input_index], + ) + ) + + # Apply the substitutions to the join's condition and output columns. node._condition = apply_substitution(node.condition, substitutions, {}) node._columns = { name: apply_substitution(expr, substitutions, {}) @@ -148,37 +276,27 @@ def pull_project_into_join(node: Join, input_index: int) -> None: def pull_project_into_filter(node: Filter) -> None: """ - TODO + Attempts to pull columns from a Project node that is an input to a Filter + into the output columns of the Filter node, and into the filter condition. + This transformation is done in-place. + + Args: + `node`: The Filter node to pull the Project columns into. """ + + # Skip if the filter's input is not a Project node. if not isinstance(node.input, Project): return - project: Project = node.input - - finder: ColumnReferenceFinder = ColumnReferenceFinder() - finder.reset() - node.condition.accept(finder) - condition_cols: set[ColumnReference] = finder.get_column_references() - condition_names: set[str] = {col.name for col in condition_cols} - finder.reset() - for expr in node.columns.values(): - expr.accept(finder) - output_cols: set[ColumnReference] = finder.get_column_references() - output_names: set[str] = {col.name for col in output_cols} - - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( - widen_columns(project) + # Invoke the common helper for Join/Filter/Limit to identify which columns + # from the project can be pulled up into the filter's output columns or + # condition, and modifies the project node in-place to ensure every + # column in the project's inputs is available to the current node. + substitutions: dict[RelationalExpression, RelationalExpression] = ( + pull_project_helper(node.columns, [node.condition], [], node.input, None) ) - substitutions: dict[RelationalExpression, RelationalExpression] = {} - for name, expr in project.columns.items(): - new_expr: RelationalExpression = apply_substitution( - expr, transfer_substitutions, {} - ) - if (not contains_window(new_expr)) and ( - (name in condition_names) != (name in output_names) - ): - ref_expr: ColumnReference = ColumnReference(name, expr.data_type) - substitutions[ref_expr] = new_expr + + # Apply the substitutions to the filter's condition and output columns. node._condition = apply_substitution(node.condition, substitutions, {}) node._columns = { name: apply_substitution(expr, substitutions, {}) @@ -188,39 +306,27 @@ def pull_project_into_filter(node: Filter) -> None: def pull_project_into_limit(node: Limit) -> None: """ - TODO + Attempts to pull columns from a Project node that is an input to a Limit + into the output columns of the Limit node, and into the ordering + expressions. This transformation is done in-place. + + Args: + `node`: The Limit node to pull the Project columns into. """ + + # Skip if the limit's input is not a Project node. if not isinstance(node.input, Project): return - project: Project = node.input - - finder: ColumnReferenceFinder = ColumnReferenceFinder() - finder.reset() - for expr in node.columns.values(): - expr.accept(finder) - output_cols: set[ColumnReference] = finder.get_column_references() - output_names: set[str] = {col.name for col in output_cols} - - finder.reset() - for order_expr in node.orderings: - order_expr.expr.accept(finder) - order_cols: set[ColumnReference] = finder.get_column_references() - order_names: set[str] = {col.name for col in order_cols} - - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( - widen_columns(project) + # Invoke the common helper for Join/Filter/Limit to identify which columns + # from the project can be pulled up into the limit's output columns or + # orderings, and modifies the project node in-place to ensure every + # column in the project's inputs is available to the current node. + substitutions: dict[RelationalExpression, RelationalExpression] = ( + pull_project_helper(node.columns, [], node.orderings, node.input, None) ) - substitutions: dict[RelationalExpression, RelationalExpression] = {} - for name, expr in project.columns.items(): - new_expr: RelationalExpression = apply_substitution( - expr, transfer_substitutions, {} - ) - if (not contains_window(new_expr)) and ( - (name in output_names) != (name in order_names) - ): - ref_expr: ColumnReference = ColumnReference(name, expr.data_type) - substitutions[ref_expr] = new_expr + + # Apply the substitutions to the limit's orderings and output columns. node._columns = { name: apply_substitution(expr, substitutions, {}) for name, expr in node.columns.items() From 5e9f09daed856973eb06478ca42d65338a74011b Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 22:30:27 -0400 Subject: [PATCH 025/143] Added remaining comments --- pydough/conversion/projection_pullup.py | 315 +++++++++++++++--------- 1 file changed, 202 insertions(+), 113 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 80e267b84..356acca41 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -53,7 +53,7 @@ def widen_columns( A mapping that can be used for substitution if expressions from the node are pulled up into the parent of the node. """ - # The substitution mapping that will be built by the functiona nd returned + # The substitution mapping that will be built by the functions and returned # to the calling site. substitutions: dict[RelationalExpression, RelationalExpression] = {} @@ -153,28 +153,24 @@ def pull_non_columns(node: Join | Filter | Limit) -> RelationalNode: def pull_project_helper( - columns: dict[str, RelationalExpression], - conditions: list[RelationalExpression], - ordering: list[ExpressionSortInfo], + output_columns: dict[str, RelationalExpression], + used_columns: set[RelationalExpression], project: Project, input_name: str | None, ) -> dict[RelationalExpression, RelationalExpression]: """ Main helper utility for pulling up columns from a Project node into a - a parent Filter/Join/Limit node. This function modifies the input project - in-place to ensure every column in the project's inputs is available + a parent Filter/Join/Limit/Aggregate node. This function modifies the input + project in-place to ensure every column in the project's inputs is available to the parent node, and returns a mapping of expressions that can be used to substitute the columns in the parent node's output columns or conditions. Args: - `columns`: The columns of the parent node that the expressions from the - project node can be pulled into. - `conditions`: The condition of the parent node that the expressions - from the project node can be pulled into. This is a list so that - nodes without a condition can pass it in as empty. - `ordering`: The orderings of the parent node that the expressions from - the project node can be pulled into. This is a list so that nodes - without orderings can pass it in as empty. + `output_columns`: The columns of the parent node that the expressions + from the project node can be pulled into. + `used_columns`: The set of expressions indicating invocations of the + columns from the project in the parent node, e.g. as a filter + or join condition, limit ordering, or aggregation key. `project`: The Project node to pull columns from. `input_name`: The name of the input to the parent node that the project node is connected to. This is used to add input names to the @@ -201,17 +197,15 @@ def pull_project_helper( # First, the columns used in the output columns of the parent. finder.reset() - for expr in columns.values(): + for expr in output_columns.values(): expr.accept(finder) output_cols: set[ColumnReference] = finder.get_column_references() output_names: set[str] = {col.name for col in output_cols} - # Next the columns used in the condition or orderings + # Next the columns that are utilized by the node. finder.reset() - for cond in conditions: - cond.accept(finder) - for order_expr in ordering: - order_expr.expr.accept(finder) + for expr in used_columns: + expr.accept(finder) used_cols: set[ColumnReference] = finder.get_column_references() used_names: set[str] = {col.name for col in used_cols} @@ -252,15 +246,14 @@ def pull_project_into_join(node: Join, input_index: int) -> None: project = node.inputs[input_index] assert isinstance(project, Project) - # Invoke the common helper for Join/Filter/Limit to identify which columns - # from the project can be pulled up into the join's output columns or - # condition, and modifies the project node in-place to ensure every - # column in the project's inputs is available to the current node. + # Invoke the common helper for Join/Filter/Limit/Aggregate to identify + # which columns from the project can be pulled up into the join's output + # columns or condition, and modifies the project node in-place to ensure + # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( pull_project_helper( node.columns, - [node.condition], - [], + {node.condition}, project, node.default_input_aliases[input_index], ) @@ -288,12 +281,12 @@ def pull_project_into_filter(node: Filter) -> None: if not isinstance(node.input, Project): return - # Invoke the common helper for Join/Filter/Limit to identify which columns - # from the project can be pulled up into the filter's output columns or - # condition, and modifies the project node in-place to ensure every - # column in the project's inputs is available to the current node. + # Invoke the common helper for Join/Filter/Limit/Aggregate to identify + # which columns from the project can be pulled up into the filter's output + # columns or condition, and modifies the project node in-place to ensure + # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper(node.columns, [node.condition], [], node.input, None) + pull_project_helper(node.columns, {node.condition}, node.input, None) ) # Apply the substitutions to the filter's condition and output columns. @@ -318,12 +311,17 @@ def pull_project_into_limit(node: Limit) -> None: if not isinstance(node.input, Project): return - # Invoke the common helper for Join/Filter/Limit to identify which columns - # from the project can be pulled up into the limit's output columns or - # orderings, and modifies the project node in-place to ensure every - # column in the project's inputs is available to the current node. + # Invoke the common helper for Join/Filter/Limit/Aggregate to identify + # which columns from the project can be pulled up into the limit's output + # columns or orderings, and modifies the project node in-place to ensure + # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper(node.columns, [], node.orderings, node.input, None) + pull_project_helper( + node.columns, + {order_expr.expr for order_expr in node.orderings}, + node.input, + None, + ) ) # Apply the substitutions to the limit's orderings and output columns. @@ -341,76 +339,46 @@ def pull_project_into_limit(node: Limit) -> None: ] -def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: - """ - TODO - """ - if not isinstance(node.input, Project): - return node - - project: Project = node.input - - finder: ColumnReferenceFinder = ColumnReferenceFinder() - finder.reset() - for key_expr in node.aggregations.values(): - key_expr.accept(finder) - agg_cols: set[ColumnReference] = finder.get_column_references() - agg_names: set[str] = {col.name for col in agg_cols} - finder.reset() - for agg_expr in node.keys.values(): - agg_expr.accept(finder) - key_cols: set[ColumnReference] = finder.get_column_references() - key_names: set[str] = {col.name for col in key_cols} - - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( - widen_columns(project) - ) - substitutions: dict[RelationalExpression, RelationalExpression] = {} - new_expr: RelationalExpression - for name, expr in project.columns.items(): - new_expr = apply_substitution(expr, transfer_substitutions, {}) - if (not contains_window(new_expr)) and ( - (name in agg_names) != (name in key_names) - ): - ref_expr: ColumnReference = ColumnReference(name, expr.data_type) - substitutions[ref_expr] = new_expr - new_columns: dict[str, RelationalExpression] = { - name: ColumnReference(name, expr.data_type) for name, expr in node.keys.items() - } - new_keys: dict[str, RelationalExpression] = { - name: apply_substitution(expr, substitutions, {}) - for name, expr in node.keys.items() - } - new_aggs: dict[str, CallExpression] = {} - out_expr: RelationalExpression - new_agg_expr: CallExpression | None - for name, expr in node.aggregations.items(): - new_expr = apply_substitution(expr, substitutions, {}) - assert isinstance(new_expr, CallExpression) - out_expr, new_agg_expr = simplify_agg(new_keys, new_expr, name) - new_columns[name] = out_expr - if new_agg_expr is not None: - new_aggs[name] = new_agg_expr - agg: Aggregate = Aggregate( - input=node.input, - keys=new_keys, - aggregations=new_aggs, - ) - return merge_adjacent_projects(Project(input=agg, columns=new_columns)) - - def simplify_agg( keys: dict[str, RelationalExpression], agg: CallExpression, name: str ) -> tuple[RelationalExpression, CallExpression | None]: """ - TODO + Simplifies an aggregation call by checking if the combination of the + function versus its inputs can be rewritten in another form. The rewrite + allows expressions to be done after aggregation since there will be a + parent projection on top of the aggregate. + + Args: + `keys`: The keys of the aggregation, used for simplifications when an + aggregation function is called on a key. + `agg`: The aggregation call to simplify. + `name`: The name of the aggregation, used to build a reference in the + parent project node to the output of the aggregation. + + Returns: + A tuple containing two terms: + - The first term is the output expression that should be used in the + parent project node to refer to the final result of the aggregation + after any post-processing is done. This may contain a reference to + column `name` of the aggregation. + - The second term is the aggregation call that should be referred to + by the parent project when deriving the final answer. If this is + `None`, then the output expression can be derived entirely in the + project and does not require an aggregation call. """ + arg: RelationalExpression + + # Build a mapping from every key expression to its name. reverse_keys: dict[RelationalExpression, str] = { expr: name for name, expr in keys.items() } - out_ref: RelationalExpression = ColumnReference(name, agg.data_type) - arg: RelationalExpression + # Commonly used terms: + # - Reference to the output of the aggregation + # - Literal 0 + # - Literal 1 + # - COUNT(*) call + out_ref: RelationalExpression = ColumnReference(name, agg.data_type) zero_expr: RelationalExpression = LiteralExpression(0, agg.data_type) one_expr: RelationalExpression = LiteralExpression(1, agg.data_type) count_star: CallExpression = CallExpression( @@ -466,18 +434,16 @@ def simplify_agg( agg.op == pydop.SUM and len(agg.inputs) == 1 and isinstance(agg.inputs[0], CallExpression) + and agg.inputs[0].op == pydop.DEFAULT_TO + and isinstance(agg.inputs[0].inputs[1], LiteralExpression) + and isinstance(agg.inputs[0].inputs[1].data_type, NumericType) + and agg.inputs[0].inputs[1].value == 0 ): - if ( - agg.inputs[0].op == pydop.DEFAULT_TO - and isinstance(agg.inputs[0].inputs[1], LiteralExpression) - and isinstance(agg.inputs[0].inputs[1].data_type, NumericType) - and agg.inputs[0].inputs[1].value == 0 - ): - return CallExpression( - pydop.DEFAULT_TO, agg.data_type, [out_ref, zero_expr] - ), CallExpression(pydop.SUM, agg.data_type, [agg.inputs[0].inputs[0]]) + return CallExpression( + pydop.DEFAULT_TO, agg.data_type, [out_ref, zero_expr] + ), CallExpression(pydop.SUM, agg.data_type, [agg.inputs[0].inputs[0]]) - # If the aggregation is on a key, we can just use the key. + # For many aggregations, if the argument is a key, we can just use the key. if ( agg.op in ( @@ -495,6 +461,7 @@ def simplify_agg( ): arg = agg.inputs[0] if arg in reverse_keys: + # Reference to the key from the perspective of the project. key_ref: RelationalExpression = ColumnReference( reverse_keys[arg], agg.data_type ) @@ -547,29 +514,117 @@ def simplify_agg( return out_ref, agg +def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: + """ + Attempts to pull columns from a Project node that is an input to an + Aggregate into the inputs of the aggregation calls of the Aggregate, and + into the grouping keys. Additionally, simplifies the aggregation calls when + possible. This transformation is done in-place. + + Args: + `node`: The Filter node to pull the Project columns into. + """ + if not isinstance(node.input, Project): + return node + + # Invoke the common helper for Join/Filter/Limit/Aggregate to identify + # which columns from the project can be pulled up into the aggregation's + # keys or used as inputs to its aggregation calls, and modifies the project + # node in-place to ensure every column in the project's inputs is available + # to the current node. + substitutions: dict[RelationalExpression, RelationalExpression] = ( + pull_project_helper( + dict(node.aggregations.items()), set(node.keys.values()), node.input, None + ) + ) + + # Build up the columns of a new project that points to all of the output + # columns of the aggregate. Start with just the keys, since the aggs will + # be added later. + new_columns: dict[str, RelationalExpression] = { + name: ColumnReference(name, expr.data_type) for name, expr in node.keys.items() + } + + # Apply the substitutions to the keys and aggregations of the aggregate. + new_keys: dict[str, RelationalExpression] = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.keys.items() + } + + # Apply the substitutions to the aggregation calls of the aggregate, + # then try to simplify them, before updating the `new_columns`. + new_aggs: dict[str, CallExpression] = {} + out_expr: RelationalExpression + new_agg_expr: CallExpression | None + for name, expr in node.aggregations.items(): + new_expr = apply_substitution(expr, substitutions, {}) + assert isinstance(new_expr, CallExpression) + # Simplify agg returns the value used in the project to store the + # answer, and the aggregation value used to derive it (if needed). If + # the aggregation value is None, then it means the aggregation was + # simplified in a way that could be derived entirely in the project. + # Otherwise, the aggregation value is referenced in the project via + # a reference to `name`. + out_expr, new_agg_expr = simplify_agg(new_keys, new_expr, name) + new_columns[name] = out_expr + if new_agg_expr is not None: + new_aggs[name] = new_agg_expr + + # Build the new aggregation with the new keys/aggs, then wrap the new + # project around it. The new project is required in case `simplify_agg` + # returned any `output_expr` values that post-process the aggregation + # results, e.g. replacing `SUM(3)` with `3 * COUNT(*)`, or `MIN(key)` with + # `key`. + agg: Aggregate = Aggregate( + input=node.input, + keys=new_keys, + aggregations=new_aggs, + ) + return merge_adjacent_projects(Project(input=agg, columns=new_columns)) + + def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: """ - TODO + Attempts to merge two adjacent Aggregate nodes into a single Aggregate + node. + + Args: + `node`: The Aggregate node to merge with its input. + + Returns: + Either the original node if the merge is not applicable, or a new + Aggregate node that uses the keys of the top aggregate node, but + modifies the aggregations to not require the original input round of + aggregation. """ + + # Skip if the input to the node is not an Aggregate. if not isinstance(node.input, Aggregate): return node input_agg: Aggregate = node.input + # Identify all of the keys in the top vs bottom aggregations, transposing + # the top keys so they can be expressed in the same terms as the bottom + # keys. top_keys: set[RelationalExpression] = { transpose_expression(expr, input_agg.columns) for expr in node.keys.values() } bottom_keys: set[RelationalExpression] = set(input_agg.keys.values()) + # If there are any top keys that are not present in the bottom keys, + # then the merge fails. if len(top_keys - bottom_keys) > 0: return node + # Identify any bottom keys that are not present in the top keys. This is + # needed for situations with COUNT(*) in the top aggregation. bottom_only_keys: set[RelationalExpression] = bottom_keys - top_keys - new_keys: dict[str, RelationalExpression] = { - name: transpose_expression(expr, input_agg.columns) - for name, expr in node.keys.items() - } + # Iterate across all of the aggregations in the top Aggregate node and + # transform each of them, building the result in `new_aggs`. If any of them + # cannot be transformed, then the merge fails and we return the original + # node. new_aggs: dict[str, CallExpression] = {} input_expr: RelationalExpression for agg_name, agg_expr in node.aggregations.items(): @@ -636,6 +691,10 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: # If none of the aggregations caused a merge failure, we can return a new # Aggregate node using the top keys and the merged aggregation calls. + new_keys: dict[str, RelationalExpression] = { + name: transpose_expression(expr, input_agg.columns) + for name, expr in node.keys.items() + } return Aggregate( input=input_agg.input, keys=new_keys, @@ -645,26 +704,56 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: def pullup_projections(node: RelationalNode) -> RelationalNode: """ - TODO + The main recursive procedure done to perform projection pull-up. + + Args: + `node`: The relational node to pull projections up from. + + Returns: + The transformed node with projections pulled up on it and all of its + descendants. """ # Recursively invoke the procedure on all inputs to the node. node = node.copy(inputs=[pullup_projections(input) for input in node.inputs]) + + # Transform the current node versus its inputs depending on the type of + # node it is. match node: + # For Root/Project, attempt to squish with the child node, if possible. case RelationalRoot() | Project(): return merge_adjacent_projects(node) + + # For Join nodes, pull projections from the left input (also the right + # for INNER joins), then eject the non-column expressions + # into a parent projection. case Join(): pull_project_into_join(node, 0) if node.join_type == JoinType.INNER: pull_project_into_join(node, 1) return pull_non_columns(node) + + # For Filter nodes, pull projections into the filter's condition and + # output columns, then eject the non-column expressions into a parent + # projection. case Filter(): pull_project_into_filter(node) return pull_non_columns(node) + + # For Limit nodes, pull projections into the limit's orderings and + # output columns, then eject the non-column expressions into a parent + # projection. case Limit(): pull_project_into_limit(node) return pull_non_columns(node) + + # For Aggregate nodes, pull projections into the aggregation keys and + # aggregations (also simplifying aggregate calls when possible), then + # merge adjacent aggregations if possible. case Aggregate(): node = merge_adjacent_aggregations(node) return pull_project_into_aggregate(node) + + # For all other nodes, just returned the node as-is since its inputs + # have already been transformed. case _: return node From c45b4f224ccfb6cd06015043613599bc4f522194 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 22:31:23 -0400 Subject: [PATCH 026/143] [RUN CI] --- pydough/conversion/relational_converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 09ed9802b..f66debc38 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1454,10 +1454,10 @@ def optimize_relational_tree( # Step 10: re-run projection merging, without pushing into joins. root = confirm_root(merge_projects(root, push_into_joins=False)) - # Step 8: re-run column bubbling + # Step 11: re-run column bubbling root = bubble_column_names(root) - # Step 11: re-run column pruning. + # Step 12: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) return root From 416fbad4d3a8bd85b11573a190ce6bc65e277c10 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 14:24:23 -0400 Subject: [PATCH 027/143] Added PageRank tests and fixed bugs found along the way --- pydough/conversion/hybrid_decorrelater.py | 4 +- pydough/conversion/relational_converter.py | 6 +- pydough/qdag/collections/partition_child.py | 5 +- pydough/unqualified/qualification.py | 5 +- tests/conftest.py | 117 ++++++++ tests/gen_data/init_pagerank.sql | 42 +++ tests/test_exploration.py | 6 +- tests/test_metadata/pagerank_graphs.json | 64 ++++ tests/test_pipeline_defog.py | 2 +- tests/test_pipeline_defog_custom.py | 2 +- tests/test_pipeline_pagerank.py | 256 ++++++++++++++++ tests/test_pipeline_tpch_custom.py | 2 +- tests/test_pipeline_tpch_udf.py | 2 +- tests/test_plan_refsols/common_prefix_a.txt | 6 +- tests/test_plan_refsols/common_prefix_ak.txt | 52 ++-- tests/test_plan_refsols/common_prefix_b.txt | 11 +- tests/test_plan_refsols/common_prefix_c.txt | 23 +- tests/test_plan_refsols/common_prefix_d.txt | 7 +- tests/test_plan_refsols/common_prefix_f.txt | 13 +- tests/test_plan_refsols/common_prefix_g.txt | 7 +- tests/test_plan_refsols/common_prefix_h.txt | 23 +- tests/test_plan_refsols/correl_15.txt | 4 +- tests/test_plan_refsols/correl_17.txt | 4 +- tests/test_plan_refsols/correl_21.txt | 4 +- tests/test_plan_refsols/correl_23.txt | 4 +- tests/test_plan_refsols/correl_33.txt | 4 +- tests/test_plan_refsols/pagerank_a0.txt | 7 + tests/test_plan_refsols/pagerank_a1.txt | 16 + tests/test_plan_refsols/pagerank_a2.txt | 28 ++ tests/test_plan_refsols/pagerank_a6.txt | 76 +++++ tests/test_plan_refsols/pagerank_b0.txt | 7 + tests/test_plan_refsols/pagerank_b1.txt | 16 + tests/test_plan_refsols/pagerank_b3.txt | 40 +++ tests/test_plan_refsols/pagerank_c4.txt | 52 ++++ tests/test_plan_refsols/pagerank_d1.txt | 16 + tests/test_plan_refsols/pagerank_d5.txt | 64 ++++ tests/test_plan_refsols/triple_partition.txt | 33 ++- .../simple_pydough_functions.py | 27 ++ tests/test_pydough_to_sql.py | 4 +- tests/test_qualification.py | 4 +- tests/test_sql_refsols/pagerank_a0_sqlite.sql | 16 + tests/test_sql_refsols/pagerank_a1_sqlite.sql | 58 ++++ tests/test_sql_refsols/pagerank_a2_sqlite.sql | 99 +++++++ tests/test_sql_refsols/pagerank_a6_sqlite.sql | 279 ++++++++++++++++++ tests/test_sql_refsols/pagerank_b0_sqlite.sql | 16 + tests/test_sql_refsols/pagerank_b1_sqlite.sql | 58 ++++ tests/test_sql_refsols/pagerank_b3_sqlite.sql | 144 +++++++++ tests/test_sql_refsols/pagerank_c4_sqlite.sql | 189 ++++++++++++ tests/test_sql_refsols/pagerank_d1_sqlite.sql | 58 ++++ tests/test_sql_refsols/pagerank_d5_sqlite.sql | 234 +++++++++++++++ tests/test_unqualified_node.py | 8 +- tests/testing_utilities.py | 38 ++- 52 files changed, 2133 insertions(+), 129 deletions(-) create mode 100644 tests/gen_data/init_pagerank.sql create mode 100644 tests/test_metadata/pagerank_graphs.json create mode 100644 tests/test_pipeline_pagerank.py create mode 100644 tests/test_plan_refsols/pagerank_a0.txt create mode 100644 tests/test_plan_refsols/pagerank_a1.txt create mode 100644 tests/test_plan_refsols/pagerank_a2.txt create mode 100644 tests/test_plan_refsols/pagerank_a6.txt create mode 100644 tests/test_plan_refsols/pagerank_b0.txt create mode 100644 tests/test_plan_refsols/pagerank_b1.txt create mode 100644 tests/test_plan_refsols/pagerank_b3.txt create mode 100644 tests/test_plan_refsols/pagerank_c4.txt create mode 100644 tests/test_plan_refsols/pagerank_d1.txt create mode 100644 tests/test_plan_refsols/pagerank_d5.txt create mode 100644 tests/test_sql_refsols/pagerank_a0_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_a1_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_a2_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_a6_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_b0_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_b1_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_b3_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_c4_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_d1_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_d5_sqlite.sql diff --git a/pydough/conversion/hybrid_decorrelater.py b/pydough/conversion/hybrid_decorrelater.py index 120bd1b9c..5412735d1 100644 --- a/pydough/conversion/hybrid_decorrelater.py +++ b/pydough/conversion/hybrid_decorrelater.py @@ -294,8 +294,8 @@ def correl_ref_purge( new_parent_uni_keys, ) if isinstance(operation, HybridCalculate): - for str, expr in operation.new_expressions.items(): - operation.new_expressions[str] = operation.terms[name] + for name, expr in operation.new_expressions.items(): + operation.new_expressions[name] = operation.terms[name] if isinstance(operation, HybridFilter): operation.condition = self.remove_correl_refs( operation.condition, diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 956bb3698..1e51e9df6 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1023,7 +1023,7 @@ def translate_calculate( # it relative to the input context. for name in node.new_expressions: name = node.renamings.get(name, name) - hybrid_expr: HybridExpr = node.terms[name] + hybrid_expr: HybridExpr = node.new_expressions[name] ref_expr: HybridRefExpr = HybridRefExpr(name, hybrid_expr.typ) rel_expr: RelationalExpression = self.translate_expression( hybrid_expr, context @@ -1416,6 +1416,10 @@ def optimize_relational_tree( Returns: The optimized relational root. """ + + # Step 0: prune unused columns. + root = ColumnPruner().prune_unused_columns(root) + # Step 1: push filters down as far as possible root._input = push_filters(root.input, set()) diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index e0609e658..da181ec44 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -97,7 +97,10 @@ def get_term(self, term_name: str): if term_name in self.inherited_downstreamed_terms: context: PyDoughCollectionQDAG = self.child_access while term_name not in context.all_terms: - if context is self.child_access: + if ( + context is self.child_access + and term_name in self.ancestor_context.inherited_downstreamed_terms + ): context = self.ancestor_context else: assert context.ancestor_context is not None diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index 3430ab7e0..a6dddf1f6 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -881,7 +881,7 @@ def split_partition_ancestry( Returns: A tuple where the first element is the ancestor of all the data being partitioned, the second is the data being partitioned which - now points to an root instead of hte original ancestor, and the + now points to an root instead of the original ancestor, and the third is a list of the ancestor names. """ @@ -903,6 +903,7 @@ def split_partition_ancestry( | UnqualifiedOrderBy() | UnqualifiedSingular() | UnqualifiedPartition() + | UnqualifiedBest() ): parent: UnqualifiedNode = node._parcel[0] new_ancestry, new_child, ancestry_names = self.split_partition_ancestry( @@ -963,6 +964,8 @@ def split_partition_ancestry( build_node[0] = UnqualifiedOrderBy(build_node[0], *node._parcel[1:]) case UnqualifiedSingular(): build_node[0] = UnqualifiedSingular(build_node[0], *node._parcel[1:]) + case UnqualifiedBest(): + build_node[0] = UnqualifiedBest(build_node[0], *node._parcel[1:]) case _: # Any other unqualified node would mean something is malformed. raise PyDoughUnqualifiedException( diff --git a/tests/conftest.py b/tests/conftest.py index 32695a9a0..44de583c5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -425,3 +425,120 @@ def sqlite_technograph_connection() -> DatabaseContext: # Return the database context. return DatabaseContext(DatabaseConnection(connection), DatabaseDialect.SQLITE) + + +@pytest.fixture(scope="session") +def get_pagerank_graph() -> graph_fetcher: + """ + A function that returns the graph used for PageRank calculations. + """ + + @cache + def impl(name: str) -> GraphMetadata: + return pydough.parse_json_metadata_from_file( + file_path=f"{os.path.dirname(__file__)}/test_metadata/pagerank_graphs.json", + graph_name="PAGERANK", + ) + + return impl + + +@pytest.fixture(scope="session") +def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: + """ + Returns the SQLITE database contexts for the various pagerank database. + """ + # Setup the directory to be the main PyDough directory. + base_dir: str = os.path.dirname(os.path.dirname(__file__)) + + # Outputs verfied via https://pagerank-visualizer.netlify.app/ + pagerank_configs = [ + ("PAGERANK_A", 4, [(1, 2), (2, 1), (2, 3), (3, 4), (4, 1), (4, 2)]), + ("PAGERANK_B", 5, [(1, 2), (2, 1), (2, 5), (3, 2), (4, 2), (4, 5), (5, 3)]), + ( + "PAGERANK_C", + 8, + [ + (1, 2), + (1, 6), + (2, 1), + (2, 5), + (2, 6), + (3, 2), + (4, 2), + (4, 5), + (5, 3), + (7, 8), + (8, 7), + ], + ), + ( + "PAGERANK_D", + 16, + [ + (1, 2), + (1, 3), + (1, 4), + (1, 5), + (2, 1), + (2, 5), + (3, 2), + (4, 2), + (4, 5), + (4, 11), + (5, 3), + (5, 11), + (5, 14), + (5, 16), + (6, 7), + (7, 8), + (8, 6), + (8, 7), + (9, 2), + (9, 10), + (11, 12), + (12, 13), + (12, 14), + (13, 4), + (13, 5), + (15, 2), + ], + ), + ] + + # Setup the pagerank databases. + result: dict[str, DatabaseContext] = {} + for name, nodes, vertices in pagerank_configs: + subprocess.run( + f"cd tests; rm -fv gen_data/{name.lower()}.db; sqlite3 gen_data/{name.lower()}.db < gen_data/init_pagerank.sql", + shell=True, + ) + path: str = os.path.join(base_dir, f"tests/gen_data/{name.lower()}.db") + connection: sqlite3.Connection = sqlite3.connect(path) + cursor: sqlite3.Cursor = connection.cursor() + for site in range(nodes): + cursor.execute( + "INSERT INTO SITES VALUES (?, ?)", + (site + 1, f"SITE {chr(ord('A') + site)}"), + ) + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site + 1, site + 1), + ) + no_links: set[int] = set(range(1, nodes + 1)) + for src, dst in vertices: + no_links.discard(src) + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (src, dst), + ) + for site in no_links: + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, None), + ) + cursor.connection.commit() + result[name] = DatabaseContext( + DatabaseConnection(connection), DatabaseDialect.SQLITE + ) + return result diff --git a/tests/gen_data/init_pagerank.sql b/tests/gen_data/init_pagerank.sql new file mode 100644 index 000000000..cb6758196 --- /dev/null +++ b/tests/gen_data/init_pagerank.sql @@ -0,0 +1,42 @@ +-- TODO + +CREATE TABLE SITES ( + s_key INTEGER NOT NULL, + s_name TEXT NOT NULL +); + +CREATE TABLE LINKS ( + l_source INTEGER NOT NULL, + l_target INTEGER +); + +-- INSERT INTO SITES (s_key, s_name) VALUES +-- (1, 'Site A'), +-- (2, 'Site B'), +-- (3, 'Site C'), +-- (4, 'Site D'), +-- (5, 'Site E') +-- ; + +-- INSERT INTO LINKS (l_source, l_target) VALUES +-- (1, 2), (1, 3), (1, 4), (1, 5), +-- (2, 1), (2, 3), +-- (3, NULL), +-- (4, 1), (4, 2), (4, 3), +-- (5, 1), (5, 4) +-- ; + +-- INSERT INTO SITES (s_key, s_name) VALUES +-- (1, 'Site A'), +-- (2, 'Site B'), +-- (3, 'Site C'), +-- (4, 'Site D') +-- ; + +-- INSERT INTO LINKS (l_source, l_target) VALUES +-- (1, 2), +-- (2, 1), (2, 3), +-- (3, 4), +-- (4, 1), (4, 2) +-- ; + diff --git a/tests/test_exploration.py b/tests/test_exploration.py index e9adc64a8..0e3780069 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -1289,7 +1289,7 @@ def test_graph_structure( ) def unqualified_exploration_test_data( request, -) -> tuple[str, Callable[[], UnqualifiedNode], str, str]: +) -> tuple[str, Callable[..., UnqualifiedNode], str, str]: """ Testing data used for test_unqualified_node_exploration. Returns a tuple of the graph name to use, a function that takes in a graph and returns the @@ -1298,7 +1298,7 @@ def unqualified_exploration_test_data( without verbose mode. """ graph_name: str = request.param[0] - test_impl: Callable[[], UnqualifiedNode] = request.param[1] + test_impl: Callable[..., UnqualifiedNode] = request.param[1] verbose_refsol: str = request.param[2] non_verbose_refsol: str = request.param[3] return graph_name, test_impl, verbose_refsol.strip(), non_verbose_refsol.strip() @@ -1313,7 +1313,7 @@ def unqualified_exploration_test_data( ) def test_unqualified_node_exploration( unqualified_exploration_test_data: tuple[ - str, Callable[[], UnqualifiedNode], str, str + str, Callable[..., UnqualifiedNode], str, str ], verbose: bool, get_sample_graph: graph_fetcher, diff --git a/tests/test_metadata/pagerank_graphs.json b/tests/test_metadata/pagerank_graphs.json new file mode 100644 index 000000000..bd4150ceb --- /dev/null +++ b/tests/test_metadata/pagerank_graphs.json @@ -0,0 +1,64 @@ +[ + { + "name": "PAGERANK", + "version": "V2", + "collections": [ + { + "name": "sites", + "type": "simple table", + "table path": "main.SITES", + "unique properties": ["key"], + "properties": [ + {"name": "key", "type": "table column", "column name": "s_key", "data type": "numeric"}, + {"name": "name", "type": "table column", "column name": "s_name", "data type": "string"} + ] + }, + { + "name": "links", + "type": "simple table", + "table path": "main.LINKS", + "unique properties": [["source_key", "target_key"]], + "properties": [ + {"name": "source_key", "type": "table column", "column name": "l_source", "data type": "numeric"}, + {"name": "target_key", "type": "table column", "column name": "l_target", "data type": "numeric"} + ] + } + ], + "relationships": [ + { + "type": "simple join", + "name": "outgoing_links", + "parent collection": "sites", + "child collection": "links", + "singular": false, + "always matches": true, + "keys": {"key": ["source_key"]} + }, + { + "type": "reverse", + "name": "source_site", + "original parent": "sites", + "original property": "outgoing_links", + "singular": true, + "always matches": true + }, + { + "type": "general join", + "name": "incoming_links", + "parent collection": "sites", + "child collection": "links", + "singular": false, + "always matches": true, + "condition": "ABSENT(other.target_key) | (self.key == other.target_key)" + }, + { + "type": "reverse", + "name": "target_site", + "original parent": "sites", + "original property": "incoming_links", + "singular": false, + "always matches": true + } + ] + } +] \ No newline at end of file diff --git a/tests/test_pipeline_defog.py b/tests/test_pipeline_defog.py index e21eefc11..f22ee3bca 100644 --- a/tests/test_pipeline_defog.py +++ b/tests/test_pipeline_defog.py @@ -1122,7 +1122,7 @@ def test_defog_until_sql( """ Tests the conversion of the defog analytical questions to SQL. """ - unqualified_impl: Callable[[], UnqualifiedNode] = ( + unqualified_impl: Callable[..., UnqualifiedNode] = ( defog_pipeline_test_data.pydough_function ) graph_name: str = defog_pipeline_test_data.graph_name diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index eea8fb774..f254c58fa 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -1647,7 +1647,7 @@ def test_pipeline_e2e_defog_custom( ], ) def test_defog_e2e_errors( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], graph_name: str, error_message: str, defog_graphs: graph_fetcher, diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py new file mode 100644 index 000000000..ead18dd81 --- /dev/null +++ b/tests/test_pipeline_pagerank.py @@ -0,0 +1,256 @@ +""" +Integration tests for the PyDough workflow with custom questions on the TPC-H +dataset. +""" + +from collections.abc import Callable + +import pandas as pd +import pytest + +from pydough.database_connectors import DatabaseContext, DatabaseDialect +from tests.test_pydough_functions.simple_pydough_functions import pagerank + +from .testing_utilities import PyDoughPandasTest, graph_fetcher + + +@pytest.fixture( + params=[ + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_A", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4], + "page_rank": [0.25] * 4, + } + ), + "pagerank_a0", + order_sensitive=True, + args=[0], + ), + id="pagerank_a0", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_A", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4], + "page_rank": [0.25, 0.35625, 0.14375, 0.25], + } + ), + "pagerank_a1", + order_sensitive=True, + args=[1], + ), + id="pagerank_a1", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_A", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4], + "page_rank": [0.29516, 0.35625, 0.18891, 0.15969], + } + ), + "pagerank_a2", + order_sensitive=True, + args=[2], + ), + id="pagerank_a2", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_A", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4], + "page_rank": [0.27205, 0.34791, 0.18787, 0.19218], + } + ), + "pagerank_a6", + order_sensitive=True, + args=[6], + ), + id="pagerank_a6", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_B", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5], + "page_rank": [0.2] * 5, + } + ), + "pagerank_b0", + order_sensitive=True, + args=[0], + ), + id="pagerank_b0", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_B", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5], + "page_rank": [0.115, 0.455, 0.2, 0.03, 0.2], + } + ), + "pagerank_b1", + order_sensitive=True, + args=[1], + ), + id="pagerank_b1", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_B", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5], + "page_rank": [0.16196, 0.40262, 0.23071, 0.03, 0.17471], + } + ), + "pagerank_b3", + order_sensitive=True, + args=[3], + ), + id="pagerank_b3", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_D", + lambda: pd.DataFrame( + { + "key": range(1, 17), + "page_rank": [ + 0.0459, + 0.18314, + 0.0459, + 0.05918, + 0.10345, + 0.0459, + 0.09902, + 0.07246, + 0.01934, + 0.0459, + 0.05033, + 0.07246, + 0.0459, + 0.05918, + 0.01934, + 0.03262, + ], + } + ), + "pagerank_d1", + order_sensitive=True, + args=[1], + ), + id="pagerank_d1", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_D", + lambda: pd.DataFrame( + { + "key": range(1, 17), + "page_rank": [ + 0.06896, + 0.11157, + 0.05385, + 0.04884, + 0.11486, + 0.05966, + 0.10651, + 0.10618, + 0.01647, + 0.02365, + 0.05369, + 0.06529, + 0.04508, + 0.06876, + 0.01647, + 0.04015, + ], + } + ), + "pagerank_d5", + order_sensitive=True, + args=[5], + ), + id="pagerank_d5", + ), + ], +) +def pagerank_pipeline_test_data(request) -> PyDoughPandasTest: + """ + Test data for e2e tests on custom queries using the TPC-H database and + sqlite UDFs. Returns an instance of PyDoughPandasTest containing + information about the test. + """ + return request.param + + +def test_pipeline_until_relational_pagerank( + pagerank_pipeline_test_data: PyDoughPandasTest, + get_pagerank_graph: graph_fetcher, + get_plan_test_filename: Callable[[str], str], + update_tests: bool, +) -> None: + """ + Verifies the generated relational plans for the pagerank tests. + """ + file_path: str = get_plan_test_filename(pagerank_pipeline_test_data.test_name) + pagerank_pipeline_test_data.run_relational_test( + get_pagerank_graph, file_path, update_tests + ) + + +def test_pipeline_until_sql_pagerank( + pagerank_pipeline_test_data: PyDoughPandasTest, + get_pagerank_graph: graph_fetcher, + get_sql_test_filename: Callable[[str, DatabaseDialect], str], + sqlite_pagerank_db_contexts: dict[str, DatabaseContext], + update_tests: bool, +) -> None: + """ + Verifies the generated SQL for the pagerank tests. + """ + ctx: DatabaseContext = sqlite_pagerank_db_contexts[ + pagerank_pipeline_test_data.graph_name + ] + file_path: str = get_sql_test_filename( + pagerank_pipeline_test_data.test_name, ctx.dialect + ) + pagerank_pipeline_test_data.run_sql_test( + get_pagerank_graph, file_path, update_tests, ctx + ) + + +@pytest.mark.execute +def test_pipeline_e2e_pagerank( + pagerank_pipeline_test_data: PyDoughPandasTest, + get_pagerank_graph: graph_fetcher, + sqlite_pagerank_db_contexts: dict[str, DatabaseContext], +): + """ + Verifies the final output answer for the pagerank tests. + """ + pagerank_pipeline_test_data.run_e2e_test( + get_pagerank_graph, + sqlite_pagerank_db_contexts[pagerank_pipeline_test_data.graph_name], + ) diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 56a44feb8..e7ee39ad6 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -3312,7 +3312,7 @@ def test_pipeline_e2e_tpch_custom( ], ) def test_pipeline_e2e_errors( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], columns: dict[str, str] | list[str] | None, error_message: str, get_sample_graph: graph_fetcher, diff --git a/tests/test_pipeline_tpch_udf.py b/tests/test_pipeline_tpch_udf.py index 42ec6c57d..6ef51125e 100644 --- a/tests/test_pipeline_tpch_udf.py +++ b/tests/test_pipeline_tpch_udf.py @@ -437,7 +437,7 @@ def test_pipeline_e2e_tpch_sqlite_udf( ], ) def test_pipeline_tpch_sqlite_udf_errors( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], error_message: str, get_udf_graph: graph_fetcher, ): diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index e595a8632..4c60d19f0 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations_0': t0.n_nations_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t0.n_nations, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations': 1:numeric, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 905b6bb93..5a0d2c111 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) - FILTER(condition=sum_sum_n_rows > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) + FILTER(condition=sum_sum_sum_sum_n_rows > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_sum_n_rows': sum_sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_sum_sum_n_rows': SUM(sum_sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,8 +10,8 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cus SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'sum_sum_n_rows': t1.sum_sum_n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) @@ -22,23 +22,25 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cus SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows}) + AGGREGATE(keys={'c_custkey': c_custkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'o_orderkey': t0.o_orderkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 76bd980c0..63b2ed66a 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations_0': t0.n_nations_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations), 'n_suppliers': SUM(n_suppliers)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t0.n_nations, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index b37c12250..e0bb5bd1f 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', n_orders), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', n_orders), ('n_parts', n_parts)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'n_parts': t1.n_parts, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nations': sum_agg_1, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_parts': sum_agg_22, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_expr_18, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_22': SUM(agg_22), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_18': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -12,10 +12,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18_0': t0.expr_18_0, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - PROJECT(columns={'expr_18_0': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': SUM(expr_18), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18': t0.expr_18, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + PROJECT(columns={'expr_18': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index d379695ad..2a50927f0 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -3,7 +3,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_orders_94': DEFAULT_TO(sum_sum_expr_7, 0:numeric), 'n_orders_95': DEFAULT_TO(sum_sum_expr_10, 0:numeric), 'n_orders_96': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -21,6 +21,5 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index c71a26a59..2b51e5998 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_agg_8)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_agg_8': SUM(agg_8)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 171bc4f3e..2dc58c1e0 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -2,12 +2,11 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.agg_2, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.agg_2, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) PROJECT(columns={'agg_2': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 17e99a1c8..a9bfb0e30 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders), ('n_customers', sum_n_rows), ('n_parts', sum_agg_22), ('n_suppliers', sum_sum_expr_18)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'r_name': t0.r_name, 'sum_agg_22': t1.sum_agg_22, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_0, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_0': t0.agg_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nations': sum_agg_0, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'sum_agg_22': sum_agg_22, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18': sum_sum_expr_18}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_agg_22': SUM(agg_22), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_18': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_0': t0.agg_0, 'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) PROJECT(columns={'agg_0': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -12,10 +12,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18_0': t0.expr_18_0, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - PROJECT(columns={'expr_18_0': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': SUM(expr_18), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18': t0.expr_18, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + PROJECT(columns={'expr_18': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index d31f6b6ff..841e83d3d 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('n', n)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 51fe077d4..8b74cc17a 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('fullname', fname)], orderings=[(fname):asc_first]) - PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(r_name), lname)}) +ROOT(columns=[('fullname', fullname)], orderings=[(fullname):asc_first]) + PROJECT(columns={'fullname': JOIN_STRINGS('-':string, LOWER(r_name), lname)}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'lname': t0.lname, 'r_name': t1.r_name}) PROJECT(columns={'lname': LOWER(n_name), 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_21.txt b/tests/test_plan_refsols/correl_21.txt index b3da2efc6..4f028747f 100644 --- a/tests/test_plan_refsols/correl_21.txt +++ b/tests/test_plan_refsols/correl_21.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('n_sizes', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) FILTER(condition=n_rows > avg_n_parts, columns={}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_n_parts': t0.avg_n_parts, 'n_rows': t1.n_rows}) AGGREGATE(keys={}, aggregations={'avg_n_parts': AVG(n_parts)}) diff --git a/tests/test_plan_refsols/correl_23.txt b/tests/test_plan_refsols/correl_23.txt index d884fa852..d9450143b 100644 --- a/tests/test_plan_refsols/correl_23.txt +++ b/tests/test_plan_refsols/correl_23.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('n_sizes', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) FILTER(condition=n_rows > avg_n_combo, columns={}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_n_combo': t0.avg_n_combo, 'n_rows': t1.n_rows}) AGGREGATE(keys={}, aggregations={'avg_n_combo': AVG(n_combos)}) diff --git a/tests/test_plan_refsols/correl_33.txt b/tests/test_plan_refsols/correl_33.txt index 2f9917c6a..4b3ac6075 100644 --- a/tests/test_plan_refsols/correl_33.txt +++ b/tests/test_plan_refsols/correl_33.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('n', n)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n': COUNT()}) JOIN(condition=MONTH(t1.o_orderdate) == MONTH(t0.first_order_date) & YEAR(t1.o_orderdate) == YEAR(t0.first_order_date), type=INNER, cardinality=PLURAL_UNKNOWN, columns={}) AGGREGATE(keys={}, aggregations={'first_order_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/pagerank_a0.txt b/tests/test_plan_refsols/pagerank_a0.txt new file mode 100644 index 000000000..89355a8da --- /dev/null +++ b/tests/test_plan_refsols/pagerank_a0.txt @@ -0,0 +1,7 @@ +ROOT(columns=[('key', anything_s_key), ('page_rank', page_rank_0)], orderings=[(anything_s_key):asc_first]) + PROJECT(columns={'anything_s_key': anything_s_key, 'page_rank_0': ROUND(1.0:numeric / anything_agg_2, 5:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_agg_2': ANYTHING(agg_2), 'anything_s_key': ANYTHING(s_key)}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_2': t0.agg_2, 's_key': t0.s_key}) + PROJECT(columns={'agg_2': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt new file mode 100644 index 000000000..28da857bf --- /dev/null +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -0,0 +1,16 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt new file mode 100644 index 000000000..d79575279 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -0,0 +1,28 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt new file mode 100644 index 000000000..c1eec12ef --- /dev/null +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -0,0 +1,76 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b0.txt b/tests/test_plan_refsols/pagerank_b0.txt new file mode 100644 index 000000000..89355a8da --- /dev/null +++ b/tests/test_plan_refsols/pagerank_b0.txt @@ -0,0 +1,7 @@ +ROOT(columns=[('key', anything_s_key), ('page_rank', page_rank_0)], orderings=[(anything_s_key):asc_first]) + PROJECT(columns={'anything_s_key': anything_s_key, 'page_rank_0': ROUND(1.0:numeric / anything_agg_2, 5:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_agg_2': ANYTHING(agg_2), 'anything_s_key': ANYTHING(s_key)}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_2': t0.agg_2, 's_key': t0.s_key}) + PROJECT(columns={'agg_2': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source}) diff --git a/tests/test_plan_refsols/pagerank_b1.txt b/tests/test_plan_refsols/pagerank_b1.txt new file mode 100644 index 000000000..28da857bf --- /dev/null +++ b/tests/test_plan_refsols/pagerank_b1.txt @@ -0,0 +1,16 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt new file mode 100644 index 000000000..e02382e76 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -0,0 +1,40 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt new file mode 100644 index 000000000..f063615ae --- /dev/null +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -0,0 +1,52 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d1.txt b/tests/test_plan_refsols/pagerank_d1.txt new file mode 100644 index 000000000..28da857bf --- /dev/null +++ b/tests/test_plan_refsols/pagerank_d1.txt @@ -0,0 +1,16 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt new file mode 100644 index 000000000..a3ca6ed77 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -0,0 +1,64 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index 7193ed6f9..80c4fe8aa 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -2,22 +2,23 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[( AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(percentage)}) PROJECT(columns={'percentage': 100.0:numeric * max_n_instances / DEFAULT_TO(sum_n_instances, 0:numeric), 'supp_region': supp_region}) AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) - AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) - FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': SUM(n_instances)}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_instances': t0.n_instances, 'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) + AGGREGATE(keys={'o_custkey': o_custkey, 'p_type': p_type, 'r_name': r_name}, aggregations={'n_instances': COUNT()}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) + FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 6fc5248d8..250faeee6 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3030,3 +3030,30 @@ def quantile_function_test_4(): orders_99_percent=QUANTILE(selected_orders.total_price, 0.99), orders_max=QUANTILE(selected_orders.total_price, 1.0), ) + + +def pagerank(n_iters): + d = 0.85 + n_out_expr = SUM( + outgoing_links.CALCULATE( + n_target=IFF(ABSENT(target_key), n, INTEGER((source_key != target_key))) + ).n_target + ) + source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n, n_out=n_out_expr) + for i in range(n_iters): + group_name = f"s{i}" + source = ( + source.outgoing_links.CALCULATE( + consider_link=INTEGER(ABSENT(target_key) | (source_key != target_key)) + ) + .target_site.PARTITION(name=group_name, by=key) + .target_site.CALCULATE( + n, + page_rank=(1.0 - d) / n + + d * RELSUM(consider_link * page_rank / n_out, per=group_name), + ) + .BEST(per=group_name, by=key.ASC()) + ) + if i < n_iters - 1: + source = source.CALCULATE(n_out=n_out_expr) + return source.CALCULATE(key, page_rank=ROUND(page_rank, 5)).ORDER_BY(key.ASC()) diff --git a/tests/test_pydough_to_sql.py b/tests/test_pydough_to_sql.py index 2c0a13b85..da64086e4 100644 --- a/tests/test_pydough_to_sql.py +++ b/tests/test_pydough_to_sql.py @@ -195,7 +195,7 @@ ], ) def test_pydough_to_sql_tpch( - pydough_code: Callable[[], UnqualifiedNode], + pydough_code: Callable[..., UnqualifiedNode], columns: dict[str, str] | list[str] | None, test_name: str, get_sample_graph: graph_fetcher, @@ -278,7 +278,7 @@ def test_pydough_to_sql_tpch( ], ) def test_pydough_to_sql_defog( - pydough_code: Callable[[], UnqualifiedNode], + pydough_code: Callable[..., UnqualifiedNode], test_name: str, graph_name: str, defog_graphs: graph_fetcher, diff --git a/tests/test_qualification.py b/tests/test_qualification.py index aa5da57fa..17ddbc553 100644 --- a/tests/test_qualification.py +++ b/tests/test_qualification.py @@ -945,7 +945,7 @@ ], ) def test_qualify_node_to_ast_string( - impl: Callable[[], UnqualifiedNode], + impl: Callable[..., UnqualifiedNode], answer_tree_str: str, get_sample_graph: graph_fetcher, default_config: PyDoughConfigs, @@ -1047,7 +1047,7 @@ def test_qualify_node_to_ast_string( ], ) def test_qualify_node_collation( - impl: Callable[[], UnqualifiedNode], + impl: Callable[..., UnqualifiedNode], answer_tree_str: str, collation_default_asc: bool, propagate_collation: bool, diff --git a/tests/test_sql_refsols/pagerank_a0_sqlite.sql b/tests/test_sql_refsols/pagerank_a0_sqlite.sql new file mode 100644 index 000000000..4fe8f22be --- /dev/null +++ b/tests/test_sql_refsols/pagerank_a0_sqlite.sql @@ -0,0 +1,16 @@ +WITH _s0 AS ( + SELECT + COUNT(*) OVER () AS agg_2, + s_key + FROM main.sites +) +SELECT + MAX(_s0.s_key) AS key, + ROUND(CAST(1.0 AS REAL) / MAX(_s0.agg_2), 5) AS page_rank +FROM _s0 AS _s0 +JOIN main.links AS links + ON _s0.s_key = links.l_source +GROUP BY + _s0.s_key +ORDER BY + MAX(_s0.s_key) diff --git a/tests/test_sql_refsols/pagerank_a1_sqlite.sql b/tests/test_sql_refsols/pagerank_a1_sqlite.sql new file mode 100644 index 000000000..549289485 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_a1_sqlite.sql @@ -0,0 +1,58 @@ +WITH _t8 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t8 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t9 + ON _s2.anything_s_key = _t9.l_source + JOIN _t8 AS _s5 + ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_a2_sqlite.sql b/tests/test_sql_refsols/pagerank_a2_sqlite.sql new file mode 100644 index 000000000..32a7c2048 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_a2_sqlite.sql @@ -0,0 +1,99 @@ +WITH _t14 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t14 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t15.l_source <> _t15.l_target OR _t15.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t15 + ON _s2.anything_s_key = _t15.l_source + JOIN _t14 AS _s5 + ON _s5.s_key = _t15.l_target OR _t15.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t16 + ON _s8.anything_s_key = _t16.l_source + JOIN _t14 AS _s11 + ON _s11.s_key = _t16.l_target OR _t16.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_2 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_a6_sqlite.sql b/tests/test_sql_refsols/pagerank_a6_sqlite.sql new file mode 100644 index 000000000..9bdfb807c --- /dev/null +++ b/tests/test_sql_refsols/pagerank_a6_sqlite.sql @@ -0,0 +1,279 @@ +WITH _t38 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t38 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t33 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t39.l_source <> _t39.l_target OR _t39.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t39 + ON _s2.anything_s_key = _t39.l_source + JOIN _t38 AS _s5 + ON _s5.s_key = _t39.l_target OR _t39.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t33 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t27 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t40.l_source <> _t40.l_target OR _t40.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s8.anything_anything_n, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t40 + ON _s8.anything_s_key = _t40.l_source + JOIN _t38 AS _s11 + ON _s11.s_key = _t40.l_target OR _t40.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t27 +), _s14 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s13.l_target IS NULL, + _t.anything_anything_n, + CAST(_s13.l_source <> _s13.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_n) AS anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_2 AS _t + JOIN _s1 AS _s13 + ON _s13.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t21 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t41.l_source <> _t41.l_target OR _t41.l_target IS NULL AS INTEGER) * _s14.page_rank + ) AS REAL) / _s14.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, + _s14.anything_anything_anything_n, + _s17.s_key + FROM _s14 AS _s14 + JOIN _s1 AS _t41 + ON _s14.anything_s_key = _t41.l_source + JOIN _t38 AS _s17 + ON _s17.s_key = _t41.l_target OR _t41.l_target IS NULL +), _t_3 AS ( + SELECT + page_rank_0, + anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t21 +), _s20 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s19.l_target IS NULL, + _t.anything_anything_anything_n, + CAST(_s19.l_source <> _s19.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_3 AS _t + JOIN _s1 AS _s19 + ON _s19.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t15 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t42.l_source <> _t42.l_target OR _t42.l_target IS NULL AS INTEGER) * _s20.page_rank + ) AS REAL) / _s20.n_out + ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, + _s20.anything_anything_anything_anything_n, + _s23.s_key + FROM _s20 AS _s20 + JOIN _s1 AS _t42 + ON _s20.anything_s_key = _t42.l_source + JOIN _t38 AS _s23 + ON _s23.s_key = _t42.l_target OR _t42.l_target IS NULL +), _t_4 AS ( + SELECT + page_rank_0, + anything_anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t15 +), _s26 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s25.l_target IS NULL, + _t.anything_anything_anything_anything_n, + CAST(_s25.l_source <> _s25.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_4 AS _t + JOIN _s1 AS _s25 + ON _s25.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s26.anything_anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t43.l_source <> _t43.l_target OR _t43.l_target IS NULL AS INTEGER) * _s26.page_rank + ) AS REAL) / _s26.n_out + ) OVER (PARTITION BY _s29.s_key) AS page_rank_0, + _s26.anything_anything_anything_anything_anything_n, + _s29.s_key + FROM _s26 AS _s26 + JOIN _s1 AS _t43 + ON _s26.anything_s_key = _t43.l_source + JOIN _t38 AS _s29 + ON _s29.s_key = _t43.l_target OR _t43.l_target IS NULL +), _t_5 AS ( + SELECT + page_rank_0, + anything_anything_anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s32 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s31.l_target IS NULL, + _t.anything_anything_anything_anything_anything_n, + CAST(_s31.l_source <> _s31.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_5 AS _t + JOIN _s1 AS _s31 + ON _s31.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s32.anything_anything_anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t44.l_source <> _t44.l_target OR _t44.l_target IS NULL AS INTEGER) * _s32.page_rank + ) AS REAL) / _s32.n_out + ) OVER (PARTITION BY _s35.s_key) AS page_rank_0, + _s35.s_key + FROM _s32 AS _s32 + JOIN _s1 AS _t44 + ON _s32.anything_s_key = _t44.l_source + JOIN _t38 AS _s35 + ON _s35.s_key = _t44.l_target OR _t44.l_target IS NULL +), _t_6 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_6 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_b0_sqlite.sql b/tests/test_sql_refsols/pagerank_b0_sqlite.sql new file mode 100644 index 000000000..4fe8f22be --- /dev/null +++ b/tests/test_sql_refsols/pagerank_b0_sqlite.sql @@ -0,0 +1,16 @@ +WITH _s0 AS ( + SELECT + COUNT(*) OVER () AS agg_2, + s_key + FROM main.sites +) +SELECT + MAX(_s0.s_key) AS key, + ROUND(CAST(1.0 AS REAL) / MAX(_s0.agg_2), 5) AS page_rank +FROM _s0 AS _s0 +JOIN main.links AS links + ON _s0.s_key = links.l_source +GROUP BY + _s0.s_key +ORDER BY + MAX(_s0.s_key) diff --git a/tests/test_sql_refsols/pagerank_b1_sqlite.sql b/tests/test_sql_refsols/pagerank_b1_sqlite.sql new file mode 100644 index 000000000..549289485 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_b1_sqlite.sql @@ -0,0 +1,58 @@ +WITH _t8 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t8 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t9 + ON _s2.anything_s_key = _t9.l_source + JOIN _t8 AS _s5 + ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql new file mode 100644 index 000000000..00086a1c7 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_b3_sqlite.sql @@ -0,0 +1,144 @@ +WITH _t20 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t20 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t15 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t21.l_source <> _t21.l_target OR _t21.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t21 + ON _s2.anything_s_key = _t21.l_source + JOIN _t20 AS _s5 + ON _s5.s_key = _t21.l_target OR _t21.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t15 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s8.anything_anything_n, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t22 + ON _s8.anything_s_key = _t22.l_source + JOIN _t20 AS _s11 + ON _s11.s_key = _t22.l_target OR _t22.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s14 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s13.l_target IS NULL, + _t.anything_anything_n, + CAST(_s13.l_source <> _s13.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_n) AS anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_2 AS _t + JOIN _s1 AS _s13 + ON _s13.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _s14.page_rank + ) AS REAL) / _s14.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, + _s17.s_key + FROM _s14 AS _s14 + JOIN _s1 AS _t23 + ON _s14.anything_s_key = _t23.l_source + JOIN _t20 AS _s17 + ON _s17.s_key = _t23.l_target OR _t23.l_target IS NULL +), _t_3 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_3 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_c4_sqlite.sql b/tests/test_sql_refsols/pagerank_c4_sqlite.sql new file mode 100644 index 000000000..591e82f3d --- /dev/null +++ b/tests/test_sql_refsols/pagerank_c4_sqlite.sql @@ -0,0 +1,189 @@ +WITH _t26 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t26 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t21 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t27.l_source <> _t27.l_target OR _t27.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t27 + ON _s2.anything_s_key = _t27.l_source + JOIN _t26 AS _s5 + ON _s5.s_key = _t27.l_target OR _t27.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t21 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t15 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t28.l_source <> _t28.l_target OR _t28.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s8.anything_anything_n, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t28 + ON _s8.anything_s_key = _t28.l_source + JOIN _t26 AS _s11 + ON _s11.s_key = _t28.l_target OR _t28.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t15 +), _s14 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s13.l_target IS NULL, + _t.anything_anything_n, + CAST(_s13.l_source <> _s13.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_n) AS anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_2 AS _t + JOIN _s1 AS _s13 + ON _s13.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t29.l_source <> _t29.l_target OR _t29.l_target IS NULL AS INTEGER) * _s14.page_rank + ) AS REAL) / _s14.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, + _s14.anything_anything_anything_n, + _s17.s_key + FROM _s14 AS _s14 + JOIN _s1 AS _t29 + ON _s14.anything_s_key = _t29.l_source + JOIN _t26 AS _s17 + ON _s17.s_key = _t29.l_target OR _t29.l_target IS NULL +), _t_3 AS ( + SELECT + page_rank_0, + anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s20 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s19.l_target IS NULL, + _t.anything_anything_anything_n, + CAST(_s19.l_source <> _s19.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_3 AS _t + JOIN _s1 AS _s19 + ON _s19.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t30.l_source <> _t30.l_target OR _t30.l_target IS NULL AS INTEGER) * _s20.page_rank + ) AS REAL) / _s20.n_out + ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, + _s23.s_key + FROM _s20 AS _s20 + JOIN _s1 AS _t30 + ON _s20.anything_s_key = _t30.l_source + JOIN _t26 AS _s23 + ON _s23.s_key = _t30.l_target OR _t30.l_target IS NULL +), _t_4 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_4 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_d1_sqlite.sql b/tests/test_sql_refsols/pagerank_d1_sqlite.sql new file mode 100644 index 000000000..549289485 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_d1_sqlite.sql @@ -0,0 +1,58 @@ +WITH _t8 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t8 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t9 + ON _s2.anything_s_key = _t9.l_source + JOIN _t8 AS _s5 + ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql new file mode 100644 index 000000000..09af3379b --- /dev/null +++ b/tests/test_sql_refsols/pagerank_d5_sqlite.sql @@ -0,0 +1,234 @@ +WITH _t32 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t32 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t27 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t33.l_source <> _t33.l_target OR _t33.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t33 + ON _s2.anything_s_key = _t33.l_source + JOIN _t32 AS _s5 + ON _s5.s_key = _t33.l_target OR _t33.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t27 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t21 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t34.l_source <> _t34.l_target OR _t34.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s8.anything_anything_n, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t34 + ON _s8.anything_s_key = _t34.l_source + JOIN _t32 AS _s11 + ON _s11.s_key = _t34.l_target OR _t34.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t21 +), _s14 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s13.l_target IS NULL, + _t.anything_anything_n, + CAST(_s13.l_source <> _s13.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_n) AS anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_2 AS _t + JOIN _s1 AS _s13 + ON _s13.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t15 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t35.l_source <> _t35.l_target OR _t35.l_target IS NULL AS INTEGER) * _s14.page_rank + ) AS REAL) / _s14.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, + _s14.anything_anything_anything_n, + _s17.s_key + FROM _s14 AS _s14 + JOIN _s1 AS _t35 + ON _s14.anything_s_key = _t35.l_source + JOIN _t32 AS _s17 + ON _s17.s_key = _t35.l_target OR _t35.l_target IS NULL +), _t_3 AS ( + SELECT + page_rank_0, + anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t15 +), _s20 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s19.l_target IS NULL, + _t.anything_anything_anything_n, + CAST(_s19.l_source <> _s19.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_3 AS _t + JOIN _s1 AS _s19 + ON _s19.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t36.l_source <> _t36.l_target OR _t36.l_target IS NULL AS INTEGER) * _s20.page_rank + ) AS REAL) / _s20.n_out + ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, + _s20.anything_anything_anything_anything_n, + _s23.s_key + FROM _s20 AS _s20 + JOIN _s1 AS _t36 + ON _s20.anything_s_key = _t36.l_source + JOIN _t32 AS _s23 + ON _s23.s_key = _t36.l_target OR _t36.l_target IS NULL +), _t_4 AS ( + SELECT + page_rank_0, + anything_anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s26 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s25.l_target IS NULL, + _t.anything_anything_anything_anything_n, + CAST(_s25.l_source <> _s25.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_4 AS _t + JOIN _s1 AS _s25 + ON _s25.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s26.anything_anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t37.l_source <> _t37.l_target OR _t37.l_target IS NULL AS INTEGER) * _s26.page_rank + ) AS REAL) / _s26.n_out + ) OVER (PARTITION BY _s29.s_key) AS page_rank_0, + _s29.s_key + FROM _s26 AS _s26 + JOIN _s1 AS _t37 + ON _s26.anything_s_key = _t37.l_source + JOIN _t32 AS _s29 + ON _s29.s_key = _t37.l_target OR _t37.l_target IS NULL +), _t_5 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_5 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_unqualified_node.py b/tests/test_unqualified_node.py index 1e04d6d29..0baebad45 100644 --- a/tests/test_unqualified_node.py +++ b/tests/test_unqualified_node.py @@ -569,7 +569,7 @@ def test_unqualified_to_string( ], ) def test_init_pydough_context( - func: Callable[[], UnqualifiedNode], + func: Callable[..., UnqualifiedNode], as_string: str, get_sample_graph: graph_fetcher, ) -> None: @@ -579,7 +579,7 @@ def test_init_pydough_context( at least based on string representation. """ sample_graph: GraphMetadata = get_sample_graph("TPCH") - new_func: Callable[[], UnqualifiedNode] = init_pydough_context(sample_graph)(func) + new_func: Callable[..., UnqualifiedNode] = init_pydough_context(sample_graph)(func) answer: UnqualifiedNode = new_func() assert repr(answer) == as_string, ( "Mismatch between string representation of unqualified nodes and expected output" @@ -748,7 +748,7 @@ def test_init_pydough_context( ], ) def test_unqualified_errors( - func: Callable[[], UnqualifiedNode], + func: Callable[..., UnqualifiedNode], error_msg: str, get_sample_graph: graph_fetcher, ) -> None: @@ -757,6 +757,6 @@ def test_unqualified_errors( exception during the conversion to unqualified nodes. """ sample_graph: GraphMetadata = get_sample_graph("TPCH") - new_func: Callable[[], UnqualifiedNode] = init_pydough_context(sample_graph)(func) + new_func: Callable[..., UnqualifiedNode] = init_pydough_context(sample_graph)(func) with pytest.raises(Exception, match=error_msg): new_func() diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 8c55689b2..d95d35562 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -904,8 +904,9 @@ def make_relational_ordering( def transform_and_exec_pydough( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], graph: GraphMetadata, + args: list[Any] | None, ) -> UnqualifiedNode: """ Obtains the unqualified node from a PyDough function by invoking the @@ -914,12 +915,14 @@ def transform_and_exec_pydough( Args: `pydough_impl`: The PyDough function to be transformed and executed. `graph`: The metadata being used. + `args`: The arguments to pass to the PyDough function, if any. Returns: The unqualified node created by running the transformed version of `pydough_impl`. """ - return init_pydough_context(graph)(pydough_impl)() + args = args if args is not None else [] + return init_pydough_context(graph)(pydough_impl)(*args) @dataclass @@ -930,7 +933,7 @@ class PyDoughSQLComparisonTest: SQL query. """ - pydough_function: Callable[[], UnqualifiedNode] + pydough_function: Callable[..., UnqualifiedNode] """ Function that returns the PyDough code evaluated by the unit test. """ @@ -988,7 +991,9 @@ def run_e2e_test( """ # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) - root: UnqualifiedNode = transform_and_exec_pydough(self.pydough_function, graph) + root: UnqualifiedNode = transform_and_exec_pydough( + self.pydough_function, graph, None + ) # Obtain the DataFrame result from the PyDough code call_kwargs: dict = {"metadata": graph, "database": database} @@ -1037,9 +1042,10 @@ class PyDoughPandasTest: - `fix_column_names` (optional): if True, ignore whatever column names are in the output and just use the same column names as in the reference solution. + - `args` (optional): additional arguments to pass to the PyDough function. """ - pydough_function: Callable[[], UnqualifiedNode] + pydough_function: Callable[..., UnqualifiedNode] """ Function that returns the PyDough code evaluated by the unit test. """ @@ -1078,6 +1084,12 @@ class PyDoughPandasTest: same column names as in the reference solution. """ + args: list[Any] | None = None + """ + Any additional arguments to pass to the PyDough function when + executing it. If None, no additional arguments are passed. + """ + def run_relational_test( self, fetcher: graph_fetcher, @@ -1101,7 +1113,9 @@ def run_relational_test( """ # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) - root: UnqualifiedNode = transform_and_exec_pydough(self.pydough_function, graph) + root: UnqualifiedNode = transform_and_exec_pydough( + self.pydough_function, graph, self.args + ) # Run the PyDough code through the pipeline up until it is converted to # a relational plan. @@ -1153,7 +1167,9 @@ def run_sql_test( """ # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) - root: UnqualifiedNode = transform_and_exec_pydough(self.pydough_function, graph) + root: UnqualifiedNode = transform_and_exec_pydough( + self.pydough_function, graph, self.args + ) # Convert the PyDough code to SQL text call_kwargs: dict = {"metadata": graph, "database": database} @@ -1196,7 +1212,9 @@ def run_e2e_test( """ # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) - root: UnqualifiedNode = transform_and_exec_pydough(self.pydough_function, graph) + root: UnqualifiedNode = transform_and_exec_pydough( + self.pydough_function, graph, self.args + ) # Obtain the DataFrame result from the PyDough code call_kwargs: dict = { @@ -1229,7 +1247,7 @@ def run_e2e_test( def run_e2e_error_test( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], error_message: str, graph: GraphMetadata, columns: dict[str, str] | list[str] | None = None, @@ -1250,7 +1268,7 @@ def run_e2e_error_test( `config`: The PyDough configuration to use for the test, if any. """ with pytest.raises(Exception, match=error_message): - root: UnqualifiedNode = transform_and_exec_pydough(pydough_impl, graph) + root: UnqualifiedNode = transform_and_exec_pydough(pydough_impl, graph, None) call_kwargs: dict = {} if graph is not None: call_kwargs["metadata"] = graph From b121c313b16eb49bbcce082714e056dd7f144dd7 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 14:40:40 -0400 Subject: [PATCH 028/143] Started adding comments --- tests/conftest.py | 14 +++++-- tests/gen_data/init_pagerank.sql | 40 ++++--------------- .../simple_pydough_functions.py | 15 +++++++ 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 44de583c5..bbe8cd7d9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -525,18 +525,26 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: "INSERT INTO LINKS VALUES (?, ?)", (site + 1, site + 1), ) - no_links: set[int] = set(range(1, nodes + 1)) + no_incoming: set[int] = set(range(1, nodes + 1)) + no_outgoing: set[int] = set(range(1, nodes + 1)) for src, dst in vertices: - no_links.discard(src) + no_outgoing.discard(src) + no_incoming.discard(dst) cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (src, dst), ) - for site in no_links: + for site in no_outgoing: cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (site, None), ) + if len(no_outgoing) == 0: + for site in no_incoming: + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, site), + ) cursor.connection.commit() result[name] = DatabaseContext( DatabaseConnection(connection), DatabaseDialect.SQLITE diff --git a/tests/gen_data/init_pagerank.sql b/tests/gen_data/init_pagerank.sql index cb6758196..503d6fd9b 100644 --- a/tests/gen_data/init_pagerank.sql +++ b/tests/gen_data/init_pagerank.sql @@ -1,4 +1,11 @@ --- TODO +-- Custom SQL schema to initialize a custom PageRank database with tables for +-- web sites and links between them. The following assumptions are made: +-- 1. Websites without any outgoing links have an edge (key, NULL) in the LINKS +-- table, to denote that the page implicitly links to all other pages. +-- 2. If there are no websites without any outgoing links, then any websites +-- without incoming links have a dummy self-link for simplicity, which +-- should not be counted in the PageRank calculation (but is required for +-- joins to work). CREATE TABLE SITES ( s_key INTEGER NOT NULL, @@ -9,34 +16,3 @@ CREATE TABLE LINKS ( l_source INTEGER NOT NULL, l_target INTEGER ); - --- INSERT INTO SITES (s_key, s_name) VALUES --- (1, 'Site A'), --- (2, 'Site B'), --- (3, 'Site C'), --- (4, 'Site D'), --- (5, 'Site E') --- ; - --- INSERT INTO LINKS (l_source, l_target) VALUES --- (1, 2), (1, 3), (1, 4), (1, 5), --- (2, 1), (2, 3), --- (3, NULL), --- (4, 1), (4, 2), (4, 3), --- (5, 1), (5, 4) --- ; - --- INSERT INTO SITES (s_key, s_name) VALUES --- (1, 'Site A'), --- (2, 'Site B'), --- (3, 'Site C'), --- (4, 'Site D') --- ; - --- INSERT INTO LINKS (l_source, l_target) VALUES --- (1, 2), --- (2, 1), (2, 3), --- (3, 4), --- (4, 1), (4, 2) --- ; - diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 250faeee6..107106ad9 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3033,6 +3033,21 @@ def quantile_function_test_4(): def pagerank(n_iters): + """ + Computes the PageRank computation on the PAGERANK graph, starting with the + base page_rank values with an even distribution of 1.0 / n, where n is the + number of sites in the graph, then iteratively updates the page_rank values + based on the outgoing links and the damping factor d. Repeats the process + for n_iters iterations, returning the final page_rank values for each site, + rounded to 5 decimal places. Makes the following assumptions: + + - If a site has no outgoing links, then it has a single entry in + `outgoing_links` where `target_key` is null. + - If there is a site with no incoming links, and there are no sites w/o + any outgoing links, the site w/o the incoming link has a dummy link where + the source & target key are the same, which should be ignored in the + PageRank calculation. + """ d = 0.85 n_out_expr = SUM( outgoing_links.CALCULATE( From 05f71471d86e59928ae729384dfc2d69e6d9d983 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 14:40:54 -0400 Subject: [PATCH 029/143] Started adding comments --- tests/gen_data/init_pagerank.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gen_data/init_pagerank.sql b/tests/gen_data/init_pagerank.sql index 503d6fd9b..c94ba5957 100644 --- a/tests/gen_data/init_pagerank.sql +++ b/tests/gen_data/init_pagerank.sql @@ -5,7 +5,7 @@ -- 2. If there are no websites without any outgoing links, then any websites -- without incoming links have a dummy self-link for simplicity, which -- should not be counted in the PageRank calculation (but is required for --- joins to work). +-- joins to work). CREATE TABLE SITES ( s_key INTEGER NOT NULL, From 2697b108ee2827867cb712b1176d54c212baeff1 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 15:48:30 -0400 Subject: [PATCH 030/143] Added comments --- tests/conftest.py | 35 +++++++++++++++---- tests/gen_data/gen_technograph.py | 1 + tests/test_pipeline_pagerank.py | 3 +- .../simple_pydough_functions.py | 19 ++++++++++ 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bbe8cd7d9..1369621fb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -430,7 +430,9 @@ def sqlite_technograph_connection() -> DatabaseContext: @pytest.fixture(scope="session") def get_pagerank_graph() -> graph_fetcher: """ - A function that returns the graph used for PageRank calculations. + A function that returns the graph used for PageRank calculations. The same + graph is used for all PageRank tests, but different databases are used that + adhere to the same table schema setup that the graph invokes. """ @cache @@ -447,11 +449,18 @@ def impl(name: str) -> GraphMetadata: def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: """ Returns the SQLITE database contexts for the various pagerank database. + This is returned as a dictionary mapping the name of the database to the + DatabaseContext for that database, all of which adhere to the same + schema structure assumed by the PAGERANK graph. """ # Setup the directory to be the main PyDough directory. base_dir: str = os.path.dirname(os.path.dirname(__file__)) - # Outputs verfied via https://pagerank-visualizer.netlify.app/ + # The configurations for the pagerank databases. Each tuple contains: + # - The name of the database. + # - The number of nodes n in the graph. + # - The edges in the graph as a list of tuples (src, dst), assuming the + # nodes are numbered from 1 to n. pagerank_configs = [ ("PAGERANK_A", 4, [(1, 2), (2, 1), (2, 3), (3, 4), (4, 1), (4, 2)]), ("PAGERANK_B", 5, [(1, 2), (2, 1), (2, 5), (3, 2), (4, 2), (4, 5), (5, 3)]), @@ -506,9 +515,10 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: ), ] - # Setup the pagerank databases. + # Setup each of the the pagerank databases using the configurations. result: dict[str, DatabaseContext] = {} for name, nodes, vertices in pagerank_configs: + # Create the database and ensure it is empty. subprocess.run( f"cd tests; rm -fv gen_data/{name.lower()}.db; sqlite3 gen_data/{name.lower()}.db < gen_data/init_pagerank.sql", shell=True, @@ -516,15 +526,16 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: path: str = os.path.join(base_dir, f"tests/gen_data/{name.lower()}.db") connection: sqlite3.Connection = sqlite3.connect(path) cursor: sqlite3.Cursor = connection.cursor() + + # For every node, insert an entry into the SITES table. for site in range(nodes): cursor.execute( "INSERT INTO SITES VALUES (?, ?)", (site + 1, f"SITE {chr(ord('A') + site)}"), ) - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (site + 1, site + 1), - ) + + # For every edge, insert an entry into the LINKS table. Keep track of + # the nodes that have no incoming or outgoing links. no_incoming: set[int] = set(range(1, nodes + 1)) no_outgoing: set[int] = set(range(1, nodes + 1)) for src, dst in vertices: @@ -534,18 +545,28 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: "INSERT INTO LINKS VALUES (?, ?)", (src, dst), ) + + # If there are no outgoing links for a site, insert a NULL link for it, + # indicating that the site links to ALL sites. for site in no_outgoing: cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (site, None), ) + + # IF there are no nodes without outgoing links, then for each node + # without incoming links, insert a dummy link to itself. if len(no_outgoing) == 0: for site in no_incoming: cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (site, site), ) + + # Commit the changes, close the cursor, and store the context in the + # result dictionary. cursor.connection.commit() + cursor.close() result[name] = DatabaseContext( DatabaseConnection(connection), DatabaseDialect.SQLITE ) diff --git a/tests/gen_data/gen_technograph.py b/tests/gen_data/gen_technograph.py index 75ca6cdfd..521ee7861 100644 --- a/tests/gen_data/gen_technograph.py +++ b/tests/gen_data/gen_technograph.py @@ -827,3 +827,4 @@ def gen_technograph_records(cursor: sqlite3.Cursor) -> None: ) cursor.connection.commit() + cursor.close() diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index ead18dd81..7f1eeb6f8 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -228,7 +228,8 @@ def test_pipeline_until_sql_pagerank( update_tests: bool, ) -> None: """ - Verifies the generated SQL for the pagerank tests. + Verifies the generated SQL for the pagerank tests. The outputs were + generated using this website: https://pagerank-visualizer.netlify.app/. """ ctx: DatabaseContext = sqlite_pagerank_db_contexts[ pagerank_pipeline_test_data.graph_name diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 107106ad9..df2aa17f9 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3048,15 +3048,31 @@ def pagerank(n_iters): the source & target key are the same, which should be ignored in the PageRank calculation. """ + + # The dampening factor d = 0.85 + + # The expression used to determine the number of sites the graph links to, + # accounting for sites without links (which implicitly link to everything) + # and sites with a dummy link to themselves (which should be ignored). n_out_expr = SUM( outgoing_links.CALCULATE( n_target=IFF(ABSENT(target_key), n, INTEGER((source_key != target_key))) ).n_target ) + + # The seed value for the PageRank computation, which is evenly distributed. + # Also computes the number of sites in the graph, which is used downstream. source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n, n_out=n_out_expr) + + # Repeats the following procedure for n_iters iterations to build the next + # generation of PageRank values from the current generation. for i in range(n_iters): group_name = f"s{i}" + # For each site, find all sites that it links to and accumulate the + # PageRank values from the current site (divided by the # of links) in + # those linked sites, while also considering the damping factor. Calls + # .BEST() to ensure each site is included exactly once at the end. source = ( source.outgoing_links.CALCULATE( consider_link=INTEGER(ABSENT(target_key) | (source_key != target_key)) @@ -3069,6 +3085,9 @@ def pagerank(n_iters): ) .BEST(per=group_name, by=key.ASC()) ) + # Unless we are done, re-derive `n_out` for the current node. if i < n_iters - 1: source = source.CALCULATE(n_out=n_out_expr) + + # Output the final PageRank values, rounded to 5 decimal places, return source.CALCULATE(key, page_rank=ROUND(page_rank, 5)).ORDER_BY(key.ASC()) From 94726ff80bb55e797d43432c29c0c00002d8ecae Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 16:10:23 -0400 Subject: [PATCH 031/143] Fixing c4 test and refactoring the PageRank impl to be simpler & more performant with fewer window functions [RUN CI] --- tests/conftest.py | 18 +- tests/gen_data/init_pagerank.sql | 6 +- tests/test_pipeline_pagerank.py | 25 ++ tests/test_plan_refsols/pagerank_a1.txt | 10 +- tests/test_plan_refsols/pagerank_a2.txt | 45 ++- tests/test_plan_refsols/pagerank_a6.txt | 113 +++---- tests/test_plan_refsols/pagerank_b1.txt | 10 +- tests/test_plan_refsols/pagerank_b3.txt | 58 ++-- tests/test_plan_refsols/pagerank_c4.txt | 79 ++--- tests/test_plan_refsols/pagerank_d1.txt | 10 +- tests/test_plan_refsols/pagerank_d5.txt | 92 +++--- .../simple_pydough_functions.py | 25 +- tests/test_sql_refsols/pagerank_a1_sqlite.sql | 27 +- tests/test_sql_refsols/pagerank_a2_sqlite.sql | 80 ++--- tests/test_sql_refsols/pagerank_a6_sqlite.sql | 300 +++++------------- tests/test_sql_refsols/pagerank_b1_sqlite.sql | 27 +- tests/test_sql_refsols/pagerank_b3_sqlite.sql | 135 +++----- tests/test_sql_refsols/pagerank_c4_sqlite.sql | 190 ++++------- tests/test_sql_refsols/pagerank_d1_sqlite.sql | 27 +- tests/test_sql_refsols/pagerank_d5_sqlite.sql | 245 +++++--------- 20 files changed, 537 insertions(+), 985 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1369621fb..ec28ed893 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -535,12 +535,10 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: ) # For every edge, insert an entry into the LINKS table. Keep track of - # the nodes that have no incoming or outgoing links. - no_incoming: set[int] = set(range(1, nodes + 1)) + # the nodes that have no outgoing links. no_outgoing: set[int] = set(range(1, nodes + 1)) for src, dst in vertices: no_outgoing.discard(src) - no_incoming.discard(dst) cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (src, dst), @@ -554,14 +552,12 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: (site, None), ) - # IF there are no nodes without outgoing links, then for each node - # without incoming links, insert a dummy link to itself. - if len(no_outgoing) == 0: - for site in no_incoming: - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (site, site), - ) + # Insert a dummy self-link for every site. + for site in range(1, nodes + 1): + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, site), + ) # Commit the changes, close the cursor, and store the context in the # result dictionary. diff --git a/tests/gen_data/init_pagerank.sql b/tests/gen_data/init_pagerank.sql index c94ba5957..b9de0181d 100644 --- a/tests/gen_data/init_pagerank.sql +++ b/tests/gen_data/init_pagerank.sql @@ -2,10 +2,8 @@ -- web sites and links between them. The following assumptions are made: -- 1. Websites without any outgoing links have an edge (key, NULL) in the LINKS -- table, to denote that the page implicitly links to all other pages. --- 2. If there are no websites without any outgoing links, then any websites --- without incoming links have a dummy self-link for simplicity, which --- should not be counted in the PageRank calculation (but is required for --- joins to work). +-- 2. Every website has a self-link (key, key) in the LINKS table, which should +-- be ignored in PageRank calculations. CREATE TABLE SITES ( s_key INTEGER NOT NULL, diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index 7f1eeb6f8..c8a846e5f 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -128,6 +128,31 @@ ), id="pagerank_b3", ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_C", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5, 6, 7, 8], + "page_rank": [ + 0.08996, + 0.19353, + 0.11764, + 0.03252, + 0.10377, + 0.12682, + 0.16788, + 0.16788, + ], + } + ), + "pagerank_c4", + order_sensitive=True, + args=[4], + ), + id="pagerank_c4", + ), pytest.param( PyDoughPandasTest( pagerank, diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index 28da857bf..b3053e78b 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -1,9 +1,9 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) @@ -11,6 +11,6 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):as PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index d79575279..16646f254 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -1,28 +1,23 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_20, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_18, columns={'page_rank_0_20': page_rank_0_20, 's_key': s_key}) + PROJECT(columns={'dummy_link_18': dummy_link_18, 'page_rank_0_20': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_19 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_19': t0.consider_link_19, 'dummy_link_18': t0.dummy_link_18, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_19': t1.consider_link_19, 'dummy_link_18': t1.dummy_link_18, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_19': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_18': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index c1eec12ef..f29c64b32 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -1,76 +1,51 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_590, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_588, columns={'page_rank_0_590': page_rank_0_590, 's_key': s_key}) + PROJECT(columns={'dummy_link_588': dummy_link_588, 'page_rank_0_590': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_589 * page_rank_0_580 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_589': t0.consider_link_589, 'dummy_link_588': t0.dummy_link_588, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_589': t1.consider_link_589, 'dummy_link_588': t1.dummy_link_588, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580}) + FILTER(condition=dummy_link_578, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_580': page_rank_0_580, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_578': dummy_link_578, 'n_out': n_out, 'page_rank_0_580': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_579 * page_rank_0_570 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_579': t0.consider_link_579, 'dummy_link_578': t0.dummy_link_578, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_579': t1.consider_link_579, 'dummy_link_578': t1.dummy_link_578, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570}) + FILTER(condition=dummy_link_568, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_570': page_rank_0_570, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_568': dummy_link_568, 'n_out': n_out, 'page_rank_0_570': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_569 * page_rank_0_560 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_569': t0.consider_link_569, 'dummy_link_568': t0.dummy_link_568, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_569': t1.consider_link_569, 'dummy_link_568': t1.dummy_link_568, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560}) + FILTER(condition=dummy_link_558, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_560': page_rank_0_560, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_558': dummy_link_558, 'n_out': n_out, 'page_rank_0_560': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_559 * page_rank_0_550 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_559': t0.consider_link_559, 'dummy_link_558': t0.dummy_link_558, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_559': t1.consider_link_559, 'dummy_link_558': t1.dummy_link_558, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550}) + FILTER(condition=dummy_link_548, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_550': page_rank_0_550, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_548': dummy_link_548, 'n_out': n_out, 'page_rank_0_550': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_549 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_549': t0.consider_link_549, 'dummy_link_548': t0.dummy_link_548, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_549': t1.consider_link_549, 'dummy_link_548': t1.dummy_link_548, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_549': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_548': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_559': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_558': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_569': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_568': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_579': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_578': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_589': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_588': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b1.txt b/tests/test_plan_refsols/pagerank_b1.txt index 28da857bf..b3053e78b 100644 --- a/tests/test_plan_refsols/pagerank_b1.txt +++ b/tests/test_plan_refsols/pagerank_b1.txt @@ -1,9 +1,9 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) @@ -11,6 +11,6 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):as PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index e02382e76..d78322a4d 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -1,40 +1,30 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_58, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_56, columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) + PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t0.consider_link_57, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t1.consider_link_57, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) + FILTER(condition=dummy_link_46, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t0.consider_link_47, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t1.consider_link_47, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_47': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_46': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_57': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_56': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index f063615ae..1c504e853 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -1,52 +1,37 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_134, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_132, columns={'page_rank_0_134': page_rank_0_134, 's_key': s_key}) + PROJECT(columns={'dummy_link_132': dummy_link_132, 'page_rank_0_134': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_133 * page_rank_0_124 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_133': t0.consider_link_133, 'dummy_link_132': t0.dummy_link_132, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_133': t1.consider_link_133, 'dummy_link_132': t1.dummy_link_132, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124}) + FILTER(condition=dummy_link_122, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_124': page_rank_0_124, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_122': dummy_link_122, 'n_out': n_out, 'page_rank_0_124': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_123 * page_rank_0_114 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_123': t0.consider_link_123, 'dummy_link_122': t0.dummy_link_122, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_123': t1.consider_link_123, 'dummy_link_122': t1.dummy_link_122, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114}) + FILTER(condition=dummy_link_112, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_114': page_rank_0_114, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_112': dummy_link_112, 'n_out': n_out, 'page_rank_0_114': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_113 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_113': t0.consider_link_113, 'dummy_link_112': t0.dummy_link_112, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_113': t1.consider_link_113, 'dummy_link_112': t1.dummy_link_112, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_113': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_112': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_123': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_122': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_133': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_132': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d1.txt b/tests/test_plan_refsols/pagerank_d1.txt index 28da857bf..b3053e78b 100644 --- a/tests/test_plan_refsols/pagerank_d1.txt +++ b/tests/test_plan_refsols/pagerank_d1.txt @@ -1,9 +1,9 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) @@ -11,6 +11,6 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):as PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index a3ca6ed77..fb3b3c239 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -1,64 +1,44 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_286, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_284, columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) + PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t0.consider_link_285, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t1.consider_link_285, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) + FILTER(condition=dummy_link_274, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t0.consider_link_275, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t1.consider_link_275, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) + FILTER(condition=dummy_link_264, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t0.consider_link_265, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t1.consider_link_265, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) + FILTER(condition=dummy_link_254, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t0.consider_link_255, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t1.consider_link_255, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_255': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_254': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_265': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_264': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_275': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_274': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_285': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_284': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index df2aa17f9..5fd4aad86 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3062,32 +3062,35 @@ def pagerank(n_iters): ) # The seed value for the PageRank computation, which is evenly distributed. - # Also computes the number of sites in the graph, which is used downstream. + # Also computes the number of sites in the graph & the number of sites each + # site links to, which are both used downstream. source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n, n_out=n_out_expr) # Repeats the following procedure for n_iters iterations to build the next # generation of PageRank values from the current generation. for i in range(n_iters): - group_name = f"s{i}" # For each site, find all sites that it links to and accumulate the # PageRank values from the current site (divided by the # of links) in - # those linked sites, while also considering the damping factor. Calls - # .BEST() to ensure each site is included exactly once at the end. + # those linked sites, while also considering the damping factor. Uses + # RELSUM after partitioning on the destination site to perform the + # accumulation, then filters to only keep the one row of the + # destination site that came from the self-link. This ensures that each + # site is included once after each iteration, and the `n_out` value for + # that site is daisy-chained to the next iteration. source = ( source.outgoing_links.CALCULATE( - consider_link=INTEGER(ABSENT(target_key) | (source_key != target_key)) + dummy_link=PRESENT(target_key) & (source_key == target_key), + consider_link=INTEGER(ABSENT(target_key) | (source_key != target_key)), ) - .target_site.PARTITION(name=group_name, by=key) + .target_site.PARTITION(name=f"s{i}", by=key) .target_site.CALCULATE( n, + n_out, page_rank=(1.0 - d) / n - + d * RELSUM(consider_link * page_rank / n_out, per=group_name), + + d * RELSUM(consider_link * page_rank / n_out, per=f"s{i}"), ) - .BEST(per=group_name, by=key.ASC()) + .WHERE(dummy_link) ) - # Unless we are done, re-derive `n_out` for the current node. - if i < n_iters - 1: - source = source.CALCULATE(n_out=n_out_expr) # Output the final PageRank values, rounded to 5 decimal places, return source.CALCULATE(key, page_rank=ROUND(page_rank, 5)).ORDER_BY(key.ASC()) diff --git a/tests/test_sql_refsols/pagerank_a1_sqlite.sql b/tests/test_sql_refsols/pagerank_a1_sqlite.sql index 549289485..d7965056f 100644 --- a/tests/test_sql_refsols/pagerank_a1_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a1_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t8 AS ( +WITH _t7 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t8 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t8 + FROM _t7 ), _s1 AS ( SELECT l_source, @@ -26,33 +26,28 @@ WITH _t8 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t3 AS ( +), _t2 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t9 - ON _s2.anything_s_key = _t9.l_source - JOIN _t8 AS _s5 - ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + JOIN _s1 AS _t8 + ON _s2.anything_s_key = _t8.l_source + JOIN _t7 AS _s5 + ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL ) SELECT s_key AS key, ROUND(page_rank_0, 5) AS page_rank -FROM _t +FROM _t2 WHERE - _w = 1 + dummy_link ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_a2_sqlite.sql b/tests/test_sql_refsols/pagerank_a2_sqlite.sql index 32a7c2048..0b90f0fd8 100644 --- a/tests/test_sql_refsols/pagerank_a2_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a2_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t14 AS ( +WITH _t9 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t14 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t14 + FROM _t9 ), _s1 AS ( SELECT l_source, @@ -26,74 +26,48 @@ WITH _t14 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t9 AS ( +), _t4 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t15.l_source <> _t15.l_target OR _t15.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t10.l_source <> _t10.l_target OR _t10.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t10.l_target IS NULL AND _t10.l_source = _t10.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t15 - ON _s2.anything_s_key = _t15.l_source - JOIN _t14 AS _s5 - ON _s5.s_key = _t15.l_target OR _t15.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + JOIN _s1 AS _t10 + ON _s2.anything_s_key = _t10.l_source + JOIN _t9 AS _s5 + ON _s5.s_key = _t10.l_target OR _t10.l_target IS NULL +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t16 - ON _s8.anything_s_key = _t16.l_source - JOIN _t14 AS _s11 - ON _s11.s_key = _t16.l_target OR _t16.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t11.l_source <> _t11.l_target OR _t11.l_target IS NULL AS INTEGER) * _t4.page_rank_0 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_20, + NOT _t11.l_target IS NULL AND _t11.l_source = _t11.l_target AS dummy_link_18, + _s9.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t11 + ON _t11.l_source = _t4.s_key + JOIN _t9 AS _s9 + ON _s9.s_key = _t11.l_target OR _t11.l_target IS NULL + WHERE + _t4.dummy_link ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_2 + ROUND(page_rank_0_20, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_18 ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_a6_sqlite.sql b/tests/test_sql_refsols/pagerank_a6_sqlite.sql index 9bdfb807c..24a887003 100644 --- a/tests/test_sql_refsols/pagerank_a6_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a6_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t38 AS ( +WITH _t17 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t38 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t38 + FROM _t17 ), _s1 AS ( SELECT l_source, @@ -26,254 +26,128 @@ WITH _t38 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t33 AS ( +), _t12 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t39.l_source <> _t39.l_target OR _t39.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t39 - ON _s2.anything_s_key = _t39.l_source - JOIN _t38 AS _s5 - ON _s5.s_key = _t39.l_target OR _t39.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t33 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t27 AS ( + JOIN _s1 AS _t18 + ON _s2.anything_s_key = _t18.l_source + JOIN _t17 AS _s5 + ON _s5.s_key = _t18.l_target OR _t18.l_target IS NULL +), _t10 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t12.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t40.l_source <> _t40.l_target OR _t40.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s8.anything_anything_n, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t40 - ON _s8.anything_s_key = _t40.l_source - JOIN _t38 AS _s11 - ON _s11.s_key = _t40.l_target OR _t40.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t27 -), _s14 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s13.l_target IS NULL, - _t.anything_anything_n, - CAST(_s13.l_source <> _s13.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_n) AS anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_2 AS _t - JOIN _s1 AS _s13 - ON _s13.l_source = _t.s_key + CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t12.page_rank_0 + ) AS REAL) / _t12.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_550, + _t12.anything_n, + NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_548, + _t12.n_out, + _s9.s_key + FROM _t12 AS _t12 + JOIN _s1 AS _t19 + ON _t12.s_key = _t19.l_source + JOIN _t17 AS _s9 + ON _s9.s_key = _t19.l_target OR _t19.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t21 AS ( + _t12.dummy_link +), _t8 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t10.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t41.l_source <> _t41.l_target OR _t41.l_target IS NULL AS INTEGER) * _s14.page_rank - ) AS REAL) / _s14.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, - _s14.anything_anything_anything_n, - _s17.s_key - FROM _s14 AS _s14 - JOIN _s1 AS _t41 - ON _s14.anything_s_key = _t41.l_source - JOIN _t38 AS _s17 - ON _s17.s_key = _t41.l_target OR _t41.l_target IS NULL -), _t_3 AS ( - SELECT - page_rank_0, - anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t21 -), _s20 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s19.l_target IS NULL, - _t.anything_anything_anything_n, - CAST(_s19.l_source <> _s19.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_3 AS _t - JOIN _s1 AS _s19 - ON _s19.l_source = _t.s_key + CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t10.page_rank_0_550 + ) AS REAL) / _t10.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_560, + _t10.anything_n, + NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_558, + _t10.n_out, + _s13.s_key + FROM _t10 AS _t10 + JOIN _s1 AS _t20 + ON _t10.s_key = _t20.l_source + JOIN _t17 AS _s13 + ON _s13.s_key = _t20.l_target OR _t20.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t15 AS ( + _t10.dummy_link_548 +), _t6 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t8.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t42.l_source <> _t42.l_target OR _t42.l_target IS NULL AS INTEGER) * _s20.page_rank - ) AS REAL) / _s20.n_out - ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, - _s20.anything_anything_anything_anything_n, - _s23.s_key - FROM _s20 AS _s20 - JOIN _s1 AS _t42 - ON _s20.anything_s_key = _t42.l_source - JOIN _t38 AS _s23 - ON _s23.s_key = _t42.l_target OR _t42.l_target IS NULL -), _t_4 AS ( - SELECT - page_rank_0, - anything_anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t15 -), _s26 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s25.l_target IS NULL, - _t.anything_anything_anything_anything_n, - CAST(_s25.l_source <> _s25.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_4 AS _t - JOIN _s1 AS _s25 - ON _s25.l_source = _t.s_key + CAST(_t21.l_source <> _t21.l_target OR _t21.l_target IS NULL AS INTEGER) * _t8.page_rank_0_560 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_570, + _t8.anything_n, + NOT _t21.l_target IS NULL AND _t21.l_source = _t21.l_target AS dummy_link_568, + _t8.n_out, + _s17.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t21 + ON _t21.l_source = _t8.s_key + JOIN _t17 AS _s17 + ON _s17.s_key = _t21.l_target OR _t21.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t9 AS ( + _t8.dummy_link_558 +), _t4 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s26.anything_anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t6.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t43.l_source <> _t43.l_target OR _t43.l_target IS NULL AS INTEGER) * _s26.page_rank - ) AS REAL) / _s26.n_out - ) OVER (PARTITION BY _s29.s_key) AS page_rank_0, - _s26.anything_anything_anything_anything_anything_n, - _s29.s_key - FROM _s26 AS _s26 - JOIN _s1 AS _t43 - ON _s26.anything_s_key = _t43.l_source - JOIN _t38 AS _s29 - ON _s29.s_key = _t43.l_target OR _t43.l_target IS NULL -), _t_5 AS ( - SELECT - page_rank_0, - anything_anything_anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s32 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s31.l_target IS NULL, - _t.anything_anything_anything_anything_anything_n, - CAST(_s31.l_source <> _s31.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_5 AS _t - JOIN _s1 AS _s31 - ON _s31.l_source = _t.s_key + CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _t6.page_rank_0_570 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_580, + _t6.anything_n, + NOT _t22.l_target IS NULL AND _t22.l_source = _t22.l_target AS dummy_link_578, + _t6.n_out, + _s21.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t22 + ON _t22.l_source = _t6.s_key + JOIN _t17 AS _s21 + ON _s21.s_key = _t22.l_target OR _t22.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + _t6.dummy_link_568 +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s32.anything_anything_anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t44.l_source <> _t44.l_target OR _t44.l_target IS NULL AS INTEGER) * _s32.page_rank - ) AS REAL) / _s32.n_out - ) OVER (PARTITION BY _s35.s_key) AS page_rank_0, - _s35.s_key - FROM _s32 AS _s32 - JOIN _s1 AS _t44 - ON _s32.anything_s_key = _t44.l_source - JOIN _t38 AS _s35 - ON _s35.s_key = _t44.l_target OR _t44.l_target IS NULL -), _t_6 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _t4.page_rank_0_580 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s25.s_key) AS page_rank_0_590, + NOT _t23.l_target IS NULL AND _t23.l_source = _t23.l_target AS dummy_link_588, + _s25.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t23 + ON _t23.l_source = _t4.s_key + JOIN _t17 AS _s25 + ON _s25.s_key = _t23.l_target OR _t23.l_target IS NULL + WHERE + _t4.dummy_link_578 ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_6 + ROUND(page_rank_0_590, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_588 ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_b1_sqlite.sql b/tests/test_sql_refsols/pagerank_b1_sqlite.sql index 549289485..d7965056f 100644 --- a/tests/test_sql_refsols/pagerank_b1_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_b1_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t8 AS ( +WITH _t7 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t8 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t8 + FROM _t7 ), _s1 AS ( SELECT l_source, @@ -26,33 +26,28 @@ WITH _t8 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t3 AS ( +), _t2 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t9 - ON _s2.anything_s_key = _t9.l_source - JOIN _t8 AS _s5 - ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + JOIN _s1 AS _t8 + ON _s2.anything_s_key = _t8.l_source + JOIN _t7 AS _s5 + ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL ) SELECT s_key AS key, ROUND(page_rank_0, 5) AS page_rank -FROM _t +FROM _t2 WHERE - _w = 1 + dummy_link ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql index 00086a1c7..40404439e 100644 --- a/tests/test_sql_refsols/pagerank_b3_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_b3_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t20 AS ( +WITH _t11 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t20 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t20 + FROM _t11 ), _s1 AS ( SELECT l_source, @@ -26,119 +26,68 @@ WITH _t20 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t15 AS ( +), _t6 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t21.l_source <> _t21.l_target OR _t21.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t12.l_target IS NULL AND _t12.l_source = _t12.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t21 - ON _s2.anything_s_key = _t21.l_source - JOIN _t20 AS _s5 - ON _s5.s_key = _t21.l_target OR _t21.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t15 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t9 AS ( + JOIN _s1 AS _t12 + ON _s2.anything_s_key = _t12.l_source + JOIN _t11 AS _s5 + ON _s5.s_key = _t12.l_target OR _t12.l_target IS NULL +), _t4 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t6.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s8.anything_anything_n, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t22 - ON _s8.anything_s_key = _t22.l_source - JOIN _t20 AS _s11 - ON _s11.s_key = _t22.l_target OR _t22.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s14 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s13.l_target IS NULL, - _t.anything_anything_n, - CAST(_s13.l_source <> _s13.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_n) AS anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_2 AS _t - JOIN _s1 AS _s13 - ON _s13.l_source = _t.s_key + CAST(_t13.l_source <> _t13.l_target OR _t13.l_target IS NULL AS INTEGER) * _t6.page_rank_0 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_48, + _t6.anything_n, + NOT _t13.l_target IS NULL AND _t13.l_source = _t13.l_target AS dummy_link_46, + _t6.n_out, + _s9.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t13 + ON _t13.l_source = _t6.s_key + JOIN _t11 AS _s9 + ON _s9.s_key = _t13.l_target OR _t13.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + _t6.dummy_link +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _s14.page_rank - ) AS REAL) / _s14.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, - _s17.s_key - FROM _s14 AS _s14 - JOIN _s1 AS _t23 - ON _s14.anything_s_key = _t23.l_source - JOIN _t20 AS _s17 - ON _s17.s_key = _t23.l_target OR _t23.l_target IS NULL -), _t_3 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _t4.page_rank_0_48 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_58, + NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link_56, + _s13.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t14 + ON _t14.l_source = _t4.s_key + JOIN _t11 AS _s13 + ON _s13.s_key = _t14.l_target OR _t14.l_target IS NULL + WHERE + _t4.dummy_link_46 ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_3 + ROUND(page_rank_0_58, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_56 ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_c4_sqlite.sql b/tests/test_sql_refsols/pagerank_c4_sqlite.sql index 591e82f3d..e0c215a16 100644 --- a/tests/test_sql_refsols/pagerank_c4_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_c4_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t26 AS ( +WITH _t13 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t26 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t26 + FROM _t13 ), _s1 AS ( SELECT l_source, @@ -26,164 +26,88 @@ WITH _t26 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t21 AS ( +), _t8 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t27.l_source <> _t27.l_target OR _t27.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t27 - ON _s2.anything_s_key = _t27.l_source - JOIN _t26 AS _s5 - ON _s5.s_key = _t27.l_target OR _t27.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t21 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t15 AS ( + JOIN _s1 AS _t14 + ON _s2.anything_s_key = _t14.l_source + JOIN _t13 AS _s5 + ON _s5.s_key = _t14.l_target OR _t14.l_target IS NULL +), _t6 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t8.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t28.l_source <> _t28.l_target OR _t28.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s8.anything_anything_n, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t28 - ON _s8.anything_s_key = _t28.l_source - JOIN _t26 AS _s11 - ON _s11.s_key = _t28.l_target OR _t28.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t15 -), _s14 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s13.l_target IS NULL, - _t.anything_anything_n, - CAST(_s13.l_source <> _s13.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_n) AS anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_2 AS _t - JOIN _s1 AS _s13 - ON _s13.l_source = _t.s_key + CAST(_t15.l_source <> _t15.l_target OR _t15.l_target IS NULL AS INTEGER) * _t8.page_rank_0 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_114, + _t8.anything_n, + NOT _t15.l_target IS NULL AND _t15.l_source = _t15.l_target AS dummy_link_112, + _t8.n_out, + _s9.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t15 + ON _t15.l_source = _t8.s_key + JOIN _t13 AS _s9 + ON _s9.s_key = _t15.l_target OR _t15.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t9 AS ( + _t8.dummy_link +), _t4 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t6.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t29.l_source <> _t29.l_target OR _t29.l_target IS NULL AS INTEGER) * _s14.page_rank - ) AS REAL) / _s14.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, - _s14.anything_anything_anything_n, - _s17.s_key - FROM _s14 AS _s14 - JOIN _s1 AS _t29 - ON _s14.anything_s_key = _t29.l_source - JOIN _t26 AS _s17 - ON _s17.s_key = _t29.l_target OR _t29.l_target IS NULL -), _t_3 AS ( - SELECT - page_rank_0, - anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s20 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s19.l_target IS NULL, - _t.anything_anything_anything_n, - CAST(_s19.l_source <> _s19.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_3 AS _t - JOIN _s1 AS _s19 - ON _s19.l_source = _t.s_key + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _t6.page_rank_0_114 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_124, + _t6.anything_n, + NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link_122, + _t6.n_out, + _s13.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t16 + ON _t16.l_source = _t6.s_key + JOIN _t13 AS _s13 + ON _s13.s_key = _t16.l_target OR _t16.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + _t6.dummy_link_112 +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t30.l_source <> _t30.l_target OR _t30.l_target IS NULL AS INTEGER) * _s20.page_rank - ) AS REAL) / _s20.n_out - ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, - _s23.s_key - FROM _s20 AS _s20 - JOIN _s1 AS _t30 - ON _s20.anything_s_key = _t30.l_source - JOIN _t26 AS _s23 - ON _s23.s_key = _t30.l_target OR _t30.l_target IS NULL -), _t_4 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t4.page_rank_0_124 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_134, + NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_132, + _s17.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t17 + ON _t17.l_source = _t4.s_key + JOIN _t13 AS _s17 + ON _s17.s_key = _t17.l_target OR _t17.l_target IS NULL + WHERE + _t4.dummy_link_122 ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_4 + ROUND(page_rank_0_134, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_132 ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_d1_sqlite.sql b/tests/test_sql_refsols/pagerank_d1_sqlite.sql index 549289485..d7965056f 100644 --- a/tests/test_sql_refsols/pagerank_d1_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_d1_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t8 AS ( +WITH _t7 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t8 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t8 + FROM _t7 ), _s1 AS ( SELECT l_source, @@ -26,33 +26,28 @@ WITH _t8 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t3 AS ( +), _t2 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t9 - ON _s2.anything_s_key = _t9.l_source - JOIN _t8 AS _s5 - ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + JOIN _s1 AS _t8 + ON _s2.anything_s_key = _t8.l_source + JOIN _t7 AS _s5 + ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL ) SELECT s_key AS key, ROUND(page_rank_0, 5) AS page_rank -FROM _t +FROM _t2 WHERE - _w = 1 + dummy_link ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql index 09af3379b..f6f5cb16e 100644 --- a/tests/test_sql_refsols/pagerank_d5_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_d5_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t32 AS ( +WITH _t15 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t32 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t32 + FROM _t15 ), _s1 AS ( SELECT l_source, @@ -26,209 +26,108 @@ WITH _t32 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t27 AS ( +), _t10 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t33.l_source <> _t33.l_target OR _t33.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t33 - ON _s2.anything_s_key = _t33.l_source - JOIN _t32 AS _s5 - ON _s5.s_key = _t33.l_target OR _t33.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t27 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t21 AS ( + JOIN _s1 AS _t16 + ON _s2.anything_s_key = _t16.l_source + JOIN _t15 AS _s5 + ON _s5.s_key = _t16.l_target OR _t16.l_target IS NULL +), _t8 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t10.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t34.l_source <> _t34.l_target OR _t34.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s8.anything_anything_n, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t34 - ON _s8.anything_s_key = _t34.l_source - JOIN _t32 AS _s11 - ON _s11.s_key = _t34.l_target OR _t34.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t21 -), _s14 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s13.l_target IS NULL, - _t.anything_anything_n, - CAST(_s13.l_source <> _s13.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_n) AS anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_2 AS _t - JOIN _s1 AS _s13 - ON _s13.l_source = _t.s_key + CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t10.page_rank_0 + ) AS REAL) / _t10.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_256, + _t10.anything_n, + NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_254, + _t10.n_out, + _s9.s_key + FROM _t10 AS _t10 + JOIN _s1 AS _t17 + ON _t10.s_key = _t17.l_source + JOIN _t15 AS _s9 + ON _s9.s_key = _t17.l_target OR _t17.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t15 AS ( + _t10.dummy_link +), _t6 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t8.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t35.l_source <> _t35.l_target OR _t35.l_target IS NULL AS INTEGER) * _s14.page_rank - ) AS REAL) / _s14.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, - _s14.anything_anything_anything_n, - _s17.s_key - FROM _s14 AS _s14 - JOIN _s1 AS _t35 - ON _s14.anything_s_key = _t35.l_source - JOIN _t32 AS _s17 - ON _s17.s_key = _t35.l_target OR _t35.l_target IS NULL -), _t_3 AS ( - SELECT - page_rank_0, - anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t15 -), _s20 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s19.l_target IS NULL, - _t.anything_anything_anything_n, - CAST(_s19.l_source <> _s19.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_3 AS _t - JOIN _s1 AS _s19 - ON _s19.l_source = _t.s_key + CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _t8.page_rank_0_256 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_266, + _t8.anything_n, + NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link_264, + _t8.n_out, + _s13.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t18 + ON _t18.l_source = _t8.s_key + JOIN _t15 AS _s13 + ON _s13.s_key = _t18.l_target OR _t18.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t9 AS ( + _t8.dummy_link_254 +), _t4 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t6.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t36.l_source <> _t36.l_target OR _t36.l_target IS NULL AS INTEGER) * _s20.page_rank - ) AS REAL) / _s20.n_out - ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, - _s20.anything_anything_anything_anything_n, - _s23.s_key - FROM _s20 AS _s20 - JOIN _s1 AS _t36 - ON _s20.anything_s_key = _t36.l_source - JOIN _t32 AS _s23 - ON _s23.s_key = _t36.l_target OR _t36.l_target IS NULL -), _t_4 AS ( - SELECT - page_rank_0, - anything_anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s26 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s25.l_target IS NULL, - _t.anything_anything_anything_anything_n, - CAST(_s25.l_source <> _s25.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_4 AS _t - JOIN _s1 AS _s25 - ON _s25.l_source = _t.s_key + CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t6.page_rank_0_266 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_276, + _t6.anything_n, + NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_274, + _t6.n_out, + _s17.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t19 + ON _t19.l_source = _t6.s_key + JOIN _t15 AS _s17 + ON _s17.s_key = _t19.l_target OR _t19.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + _t6.dummy_link_264 +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s26.anything_anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t37.l_source <> _t37.l_target OR _t37.l_target IS NULL AS INTEGER) * _s26.page_rank - ) AS REAL) / _s26.n_out - ) OVER (PARTITION BY _s29.s_key) AS page_rank_0, - _s29.s_key - FROM _s26 AS _s26 - JOIN _s1 AS _t37 - ON _s26.anything_s_key = _t37.l_source - JOIN _t32 AS _s29 - ON _s29.s_key = _t37.l_target OR _t37.l_target IS NULL -), _t_5 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t4.page_rank_0_276 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_286, + NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_284, + _s21.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t20 + ON _t20.l_source = _t4.s_key + JOIN _t15 AS _s21 + ON _s21.s_key = _t20.l_target OR _t20.l_target IS NULL + WHERE + _t4.dummy_link_274 ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_5 + ROUND(page_rank_0_286, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_284 ORDER BY s_key From 44fdd3336fc59c664bb1eb0167474b36f193b872 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 16:16:12 -0400 Subject: [PATCH 032/143] [RUN CI] From b5d90f25e8d78685626efbc0493cec8136511cbf Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 16:17:02 -0400 Subject: [PATCH 033/143] [RUN CI] From 0128758695d8aaca07b70215a624232916ba55d5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 12:14:54 -0400 Subject: [PATCH 034/143] Added tests e/f, deleted relational/sql tests for graphs other than a/c --- tests/conftest.py | 2 + tests/test_pipeline_pagerank.py | 50 ++++++- tests/test_plan_refsols/pagerank_b0.txt | 7 - tests/test_plan_refsols/pagerank_b1.txt | 16 --- tests/test_plan_refsols/pagerank_b3.txt | 30 ---- tests/test_plan_refsols/pagerank_d1.txt | 16 --- tests/test_plan_refsols/pagerank_d5.txt | 44 ------ tests/test_sql_refsols/pagerank_b0_sqlite.sql | 16 --- tests/test_sql_refsols/pagerank_b1_sqlite.sql | 53 ------- tests/test_sql_refsols/pagerank_b3_sqlite.sql | 93 ------------ tests/test_sql_refsols/pagerank_d1_sqlite.sql | 53 ------- tests/test_sql_refsols/pagerank_d5_sqlite.sql | 133 ------------------ 12 files changed, 49 insertions(+), 464 deletions(-) delete mode 100644 tests/test_plan_refsols/pagerank_b0.txt delete mode 100644 tests/test_plan_refsols/pagerank_b1.txt delete mode 100644 tests/test_plan_refsols/pagerank_b3.txt delete mode 100644 tests/test_plan_refsols/pagerank_d1.txt delete mode 100644 tests/test_plan_refsols/pagerank_d5.txt delete mode 100644 tests/test_sql_refsols/pagerank_b0_sqlite.sql delete mode 100644 tests/test_sql_refsols/pagerank_b1_sqlite.sql delete mode 100644 tests/test_sql_refsols/pagerank_b3_sqlite.sql delete mode 100644 tests/test_sql_refsols/pagerank_d1_sqlite.sql delete mode 100644 tests/test_sql_refsols/pagerank_d5_sqlite.sql diff --git a/tests/conftest.py b/tests/conftest.py index ec28ed893..1bbe51c09 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -513,6 +513,8 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: (15, 2), ], ), + ("PAGERANK_E", 5, [(i, j) for i in range(1, 6) for j in range(1, 6) if i != j]), + ("PAGERANK_F", 100, []), ] # Setup each of the the pagerank databases using the configurations. diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index c8a846e5f..f55c53eec 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -219,6 +219,38 @@ ), id="pagerank_d5", ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_E", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5], + "page_rank": [0.2] * 5, + } + ), + "pagerank_e1", + order_sensitive=True, + args=[1], + ), + id="pagerank_e1", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_F", + lambda: pd.DataFrame( + { + "key": list(range(1, 101)), + "page_rank": [0.01] * 100, + } + ), + "pagerank_f2", + order_sensitive=True, + args=[2], + ), + id="pagerank_f2", + ), ], ) def pagerank_pipeline_test_data(request) -> PyDoughPandasTest: @@ -238,7 +270,13 @@ def test_pipeline_until_relational_pagerank( ) -> None: """ Verifies the generated relational plans for the pagerank tests. + Only runs the tests with the `PAGERANK_A`/`PAGERANK_C` graphs, + since the others are essentially duplicates of the plans. """ + if pagerank_pipeline_test_data.graph_name not in ("PAGERANK_A", "PAGERANK_C"): + pytest.skip( + "Skipping relational plan test for graphs other than PAGERANK_A or PAGERANK_C" + ) file_path: str = get_plan_test_filename(pagerank_pipeline_test_data.test_name) pagerank_pipeline_test_data.run_relational_test( get_pagerank_graph, file_path, update_tests @@ -253,9 +291,14 @@ def test_pipeline_until_sql_pagerank( update_tests: bool, ) -> None: """ - Verifies the generated SQL for the pagerank tests. The outputs were - generated using this website: https://pagerank-visualizer.netlify.app/. + Verifies the generated SQL for the pagerank tests. Only runs the tests with + the `PAGERANK_A`/`PAGE_RANK_C` graphs, since the others are essentially + duplicates of the generated SQL. """ + if pagerank_pipeline_test_data.graph_name not in ("PAGERANK_A", "PAGERANK_C"): + pytest.skip( + "Skipping sql query test for graphs other than PAGERANK_A or PAGERANK_C" + ) ctx: DatabaseContext = sqlite_pagerank_db_contexts[ pagerank_pipeline_test_data.graph_name ] @@ -274,7 +317,8 @@ def test_pipeline_e2e_pagerank( sqlite_pagerank_db_contexts: dict[str, DatabaseContext], ): """ - Verifies the final output answer for the pagerank tests. + Verifies the final output answer for the pagerank tests. The outputs were + generated using this website: https://pagerank-visualizer.netlify.app/. """ pagerank_pipeline_test_data.run_e2e_test( get_pagerank_graph, diff --git a/tests/test_plan_refsols/pagerank_b0.txt b/tests/test_plan_refsols/pagerank_b0.txt deleted file mode 100644 index 89355a8da..000000000 --- a/tests/test_plan_refsols/pagerank_b0.txt +++ /dev/null @@ -1,7 +0,0 @@ -ROOT(columns=[('key', anything_s_key), ('page_rank', page_rank_0)], orderings=[(anything_s_key):asc_first]) - PROJECT(columns={'anything_s_key': anything_s_key, 'page_rank_0': ROUND(1.0:numeric / anything_agg_2, 5:numeric)}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_agg_2': ANYTHING(agg_2), 'anything_s_key': ANYTHING(s_key)}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_2': t0.agg_2, 's_key': t0.s_key}) - PROJECT(columns={'agg_2': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source}) diff --git a/tests/test_plan_refsols/pagerank_b1.txt b/tests/test_plan_refsols/pagerank_b1.txt deleted file mode 100644 index b3053e78b..000000000 --- a/tests/test_plan_refsols/pagerank_b1.txt +++ /dev/null @@ -1,16 +0,0 @@ -ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt deleted file mode 100644 index d78322a4d..000000000 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ /dev/null @@ -1,30 +0,0 @@ -ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0_58, 5:numeric), 's_key': s_key}) - FILTER(condition=dummy_link_56, columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) - PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t0.consider_link_57, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t1.consider_link_57, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) - FILTER(condition=dummy_link_46, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t0.consider_link_47, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t1.consider_link_47, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_47': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_46': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_57': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_56': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d1.txt b/tests/test_plan_refsols/pagerank_d1.txt deleted file mode 100644 index b3053e78b..000000000 --- a/tests/test_plan_refsols/pagerank_d1.txt +++ /dev/null @@ -1,16 +0,0 @@ -ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt deleted file mode 100644 index fb3b3c239..000000000 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ /dev/null @@ -1,44 +0,0 @@ -ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0_286, 5:numeric), 's_key': s_key}) - FILTER(condition=dummy_link_284, columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) - PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t0.consider_link_285, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t1.consider_link_285, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) - FILTER(condition=dummy_link_274, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t0.consider_link_275, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t1.consider_link_275, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) - FILTER(condition=dummy_link_264, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t0.consider_link_265, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t1.consider_link_265, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) - FILTER(condition=dummy_link_254, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t0.consider_link_255, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t1.consider_link_255, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_255': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_254': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_265': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_264': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_275': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_274': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_285': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_284': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_sql_refsols/pagerank_b0_sqlite.sql b/tests/test_sql_refsols/pagerank_b0_sqlite.sql deleted file mode 100644 index 4fe8f22be..000000000 --- a/tests/test_sql_refsols/pagerank_b0_sqlite.sql +++ /dev/null @@ -1,16 +0,0 @@ -WITH _s0 AS ( - SELECT - COUNT(*) OVER () AS agg_2, - s_key - FROM main.sites -) -SELECT - MAX(_s0.s_key) AS key, - ROUND(CAST(1.0 AS REAL) / MAX(_s0.agg_2), 5) AS page_rank -FROM _s0 AS _s0 -JOIN main.links AS links - ON _s0.s_key = links.l_source -GROUP BY - _s0.s_key -ORDER BY - MAX(_s0.s_key) diff --git a/tests/test_sql_refsols/pagerank_b1_sqlite.sql b/tests/test_sql_refsols/pagerank_b1_sqlite.sql deleted file mode 100644 index d7965056f..000000000 --- a/tests/test_sql_refsols/pagerank_b1_sqlite.sql +++ /dev/null @@ -1,53 +0,0 @@ -WITH _t7 AS ( - SELECT - s_key - FROM main.sites -), _s0 AS ( - SELECT - COUNT(*) OVER () AS n, - s_key - FROM _t7 -), _s1 AS ( - SELECT - l_source, - l_target - FROM main.links -), _s2 AS ( - SELECT - COALESCE( - SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), - 0 - ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, - MAX(_s0.s_key) AS anything_s_key - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON _s0.s_key = _s1.l_source - GROUP BY - _s0.s_key -), _t2 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank - ) AS REAL) / _s2.n_out - ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, - _s5.s_key - FROM _s2 AS _s2 - JOIN _s1 AS _t8 - ON _s2.anything_s_key = _t8.l_source - JOIN _t7 AS _s5 - ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL -) -SELECT - s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t2 -WHERE - dummy_link -ORDER BY - s_key diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql deleted file mode 100644 index 40404439e..000000000 --- a/tests/test_sql_refsols/pagerank_b3_sqlite.sql +++ /dev/null @@ -1,93 +0,0 @@ -WITH _t11 AS ( - SELECT - s_key - FROM main.sites -), _s0 AS ( - SELECT - COUNT(*) OVER () AS n, - s_key - FROM _t11 -), _s1 AS ( - SELECT - l_source, - l_target - FROM main.links -), _s2 AS ( - SELECT - COALESCE( - SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), - 0 - ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, - MAX(_s0.s_key) AS anything_s_key - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON _s0.s_key = _s1.l_source - GROUP BY - _s0.s_key -), _t6 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.page_rank - ) AS REAL) / _s2.n_out - ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, - NOT _t12.l_target IS NULL AND _t12.l_source = _t12.l_target AS dummy_link, - _s2.n_out, - _s5.s_key - FROM _s2 AS _s2 - JOIN _s1 AS _t12 - ON _s2.anything_s_key = _t12.l_source - JOIN _t11 AS _s5 - ON _s5.s_key = _t12.l_target OR _t12.l_target IS NULL -), _t4 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t13.l_source <> _t13.l_target OR _t13.l_target IS NULL AS INTEGER) * _t6.page_rank_0 - ) AS REAL) / _t6.n_out - ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_48, - _t6.anything_n, - NOT _t13.l_target IS NULL AND _t13.l_source = _t13.l_target AS dummy_link_46, - _t6.n_out, - _s9.s_key - FROM _t6 AS _t6 - JOIN _s1 AS _t13 - ON _t13.l_source = _t6.s_key - JOIN _t11 AS _s9 - ON _s9.s_key = _t13.l_target OR _t13.l_target IS NULL - WHERE - _t6.dummy_link -), _t2 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _t4.page_rank_0_48 - ) AS REAL) / _t4.n_out - ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_58, - NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link_56, - _s13.s_key - FROM _t4 AS _t4 - JOIN _s1 AS _t14 - ON _t14.l_source = _t4.s_key - JOIN _t11 AS _s13 - ON _s13.s_key = _t14.l_target OR _t14.l_target IS NULL - WHERE - _t4.dummy_link_46 -) -SELECT - s_key AS key, - ROUND(page_rank_0_58, 5) AS page_rank -FROM _t2 -WHERE - dummy_link_56 -ORDER BY - s_key diff --git a/tests/test_sql_refsols/pagerank_d1_sqlite.sql b/tests/test_sql_refsols/pagerank_d1_sqlite.sql deleted file mode 100644 index d7965056f..000000000 --- a/tests/test_sql_refsols/pagerank_d1_sqlite.sql +++ /dev/null @@ -1,53 +0,0 @@ -WITH _t7 AS ( - SELECT - s_key - FROM main.sites -), _s0 AS ( - SELECT - COUNT(*) OVER () AS n, - s_key - FROM _t7 -), _s1 AS ( - SELECT - l_source, - l_target - FROM main.links -), _s2 AS ( - SELECT - COALESCE( - SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), - 0 - ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, - MAX(_s0.s_key) AS anything_s_key - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON _s0.s_key = _s1.l_source - GROUP BY - _s0.s_key -), _t2 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank - ) AS REAL) / _s2.n_out - ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, - _s5.s_key - FROM _s2 AS _s2 - JOIN _s1 AS _t8 - ON _s2.anything_s_key = _t8.l_source - JOIN _t7 AS _s5 - ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL -) -SELECT - s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t2 -WHERE - dummy_link -ORDER BY - s_key diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql deleted file mode 100644 index f6f5cb16e..000000000 --- a/tests/test_sql_refsols/pagerank_d5_sqlite.sql +++ /dev/null @@ -1,133 +0,0 @@ -WITH _t15 AS ( - SELECT - s_key - FROM main.sites -), _s0 AS ( - SELECT - COUNT(*) OVER () AS n, - s_key - FROM _t15 -), _s1 AS ( - SELECT - l_source, - l_target - FROM main.links -), _s2 AS ( - SELECT - COALESCE( - SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), - 0 - ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, - MAX(_s0.s_key) AS anything_s_key - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON _s0.s_key = _s1.l_source - GROUP BY - _s0.s_key -), _t10 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.page_rank - ) AS REAL) / _s2.n_out - ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, - NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link, - _s2.n_out, - _s5.s_key - FROM _s2 AS _s2 - JOIN _s1 AS _t16 - ON _s2.anything_s_key = _t16.l_source - JOIN _t15 AS _s5 - ON _s5.s_key = _t16.l_target OR _t16.l_target IS NULL -), _t8 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t10.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t10.page_rank_0 - ) AS REAL) / _t10.n_out - ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_256, - _t10.anything_n, - NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_254, - _t10.n_out, - _s9.s_key - FROM _t10 AS _t10 - JOIN _s1 AS _t17 - ON _t10.s_key = _t17.l_source - JOIN _t15 AS _s9 - ON _s9.s_key = _t17.l_target OR _t17.l_target IS NULL - WHERE - _t10.dummy_link -), _t6 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _t8.page_rank_0_256 - ) AS REAL) / _t8.n_out - ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_266, - _t8.anything_n, - NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link_264, - _t8.n_out, - _s13.s_key - FROM _t8 AS _t8 - JOIN _s1 AS _t18 - ON _t18.l_source = _t8.s_key - JOIN _t15 AS _s13 - ON _s13.s_key = _t18.l_target OR _t18.l_target IS NULL - WHERE - _t8.dummy_link_254 -), _t4 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t6.page_rank_0_266 - ) AS REAL) / _t6.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_276, - _t6.anything_n, - NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_274, - _t6.n_out, - _s17.s_key - FROM _t6 AS _t6 - JOIN _s1 AS _t19 - ON _t19.l_source = _t6.s_key - JOIN _t15 AS _s17 - ON _s17.s_key = _t19.l_target OR _t19.l_target IS NULL - WHERE - _t6.dummy_link_264 -), _t2 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t4.page_rank_0_276 - ) AS REAL) / _t4.n_out - ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_286, - NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_284, - _s21.s_key - FROM _t4 AS _t4 - JOIN _s1 AS _t20 - ON _t20.l_source = _t4.s_key - JOIN _t15 AS _s21 - ON _s21.s_key = _t20.l_target OR _t20.l_target IS NULL - WHERE - _t4.dummy_link_274 -) -SELECT - s_key AS key, - ROUND(page_rank_0_286, 5) AS page_rank -FROM _t2 -WHERE - dummy_link_284 -ORDER BY - s_key From c9a5fe1f3c0df6b710c86c0e81734d5da588ce1a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 12:19:26 -0400 Subject: [PATCH 035/143] Adjusted how the skips are handled --- tests/test_pipeline_pagerank.py | 24 ++-- tests/test_plan_refsols/pagerank_b3.txt | 30 ++++ tests/test_plan_refsols/pagerank_d5.txt | 44 ++++++ tests/test_sql_refsols/pagerank_b3_sqlite.sql | 93 ++++++++++++ tests/test_sql_refsols/pagerank_d5_sqlite.sql | 133 ++++++++++++++++++ tests/testing_utilities.py | 22 +++ 6 files changed, 333 insertions(+), 13 deletions(-) create mode 100644 tests/test_plan_refsols/pagerank_b3.txt create mode 100644 tests/test_plan_refsols/pagerank_d5.txt create mode 100644 tests/test_sql_refsols/pagerank_b3_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_d5_sqlite.sql diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index f55c53eec..56c948395 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -91,6 +91,8 @@ } ), "pagerank_b0", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[0], ), @@ -107,6 +109,8 @@ } ), "pagerank_b1", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[1], ), @@ -181,6 +185,8 @@ } ), "pagerank_d1", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[1], ), @@ -230,6 +236,8 @@ } ), "pagerank_e1", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[1], ), @@ -246,6 +254,8 @@ } ), "pagerank_f2", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[2], ), @@ -270,13 +280,7 @@ def test_pipeline_until_relational_pagerank( ) -> None: """ Verifies the generated relational plans for the pagerank tests. - Only runs the tests with the `PAGERANK_A`/`PAGERANK_C` graphs, - since the others are essentially duplicates of the plans. """ - if pagerank_pipeline_test_data.graph_name not in ("PAGERANK_A", "PAGERANK_C"): - pytest.skip( - "Skipping relational plan test for graphs other than PAGERANK_A or PAGERANK_C" - ) file_path: str = get_plan_test_filename(pagerank_pipeline_test_data.test_name) pagerank_pipeline_test_data.run_relational_test( get_pagerank_graph, file_path, update_tests @@ -291,14 +295,8 @@ def test_pipeline_until_sql_pagerank( update_tests: bool, ) -> None: """ - Verifies the generated SQL for the pagerank tests. Only runs the tests with - the `PAGERANK_A`/`PAGE_RANK_C` graphs, since the others are essentially - duplicates of the generated SQL. + Verifies the generated SQL for the pagerank tests. """ - if pagerank_pipeline_test_data.graph_name not in ("PAGERANK_A", "PAGERANK_C"): - pytest.skip( - "Skipping sql query test for graphs other than PAGERANK_A or PAGERANK_C" - ) ctx: DatabaseContext = sqlite_pagerank_db_contexts[ pagerank_pipeline_test_data.graph_name ] diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt new file mode 100644 index 000000000..d78322a4d --- /dev/null +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -0,0 +1,30 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_58, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_56, columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) + PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t0.consider_link_57, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t1.consider_link_57, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) + FILTER(condition=dummy_link_46, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t0.consider_link_47, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t1.consider_link_47, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_47': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_46': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_57': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_56': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt new file mode 100644 index 000000000..fb3b3c239 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -0,0 +1,44 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_286, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_284, columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) + PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t0.consider_link_285, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t1.consider_link_285, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) + FILTER(condition=dummy_link_274, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t0.consider_link_275, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t1.consider_link_275, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) + FILTER(condition=dummy_link_264, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t0.consider_link_265, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t1.consider_link_265, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) + FILTER(condition=dummy_link_254, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t0.consider_link_255, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t1.consider_link_255, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_255': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_254': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_265': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_264': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_275': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_274': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_285': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_284': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql new file mode 100644 index 000000000..40404439e --- /dev/null +++ b/tests/test_sql_refsols/pagerank_b3_sqlite.sql @@ -0,0 +1,93 @@ +WITH _t11 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t11 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t6 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + NOT _t12.l_target IS NULL AND _t12.l_source = _t12.l_target AS dummy_link, + _s2.n_out, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t12 + ON _s2.anything_s_key = _t12.l_source + JOIN _t11 AS _s5 + ON _s5.s_key = _t12.l_target OR _t12.l_target IS NULL +), _t4 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t6.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t13.l_source <> _t13.l_target OR _t13.l_target IS NULL AS INTEGER) * _t6.page_rank_0 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_48, + _t6.anything_n, + NOT _t13.l_target IS NULL AND _t13.l_source = _t13.l_target AS dummy_link_46, + _t6.n_out, + _s9.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t13 + ON _t13.l_source = _t6.s_key + JOIN _t11 AS _s9 + ON _s9.s_key = _t13.l_target OR _t13.l_target IS NULL + WHERE + _t6.dummy_link +), _t2 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t4.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _t4.page_rank_0_48 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_58, + NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link_56, + _s13.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t14 + ON _t14.l_source = _t4.s_key + JOIN _t11 AS _s13 + ON _s13.s_key = _t14.l_target OR _t14.l_target IS NULL + WHERE + _t4.dummy_link_46 +) +SELECT + s_key AS key, + ROUND(page_rank_0_58, 5) AS page_rank +FROM _t2 +WHERE + dummy_link_56 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql new file mode 100644 index 000000000..f6f5cb16e --- /dev/null +++ b/tests/test_sql_refsols/pagerank_d5_sqlite.sql @@ -0,0 +1,133 @@ +WITH _t15 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t15 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t10 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link, + _s2.n_out, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t16 + ON _s2.anything_s_key = _t16.l_source + JOIN _t15 AS _s5 + ON _s5.s_key = _t16.l_target OR _t16.l_target IS NULL +), _t8 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t10.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t10.page_rank_0 + ) AS REAL) / _t10.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_256, + _t10.anything_n, + NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_254, + _t10.n_out, + _s9.s_key + FROM _t10 AS _t10 + JOIN _s1 AS _t17 + ON _t10.s_key = _t17.l_source + JOIN _t15 AS _s9 + ON _s9.s_key = _t17.l_target OR _t17.l_target IS NULL + WHERE + _t10.dummy_link +), _t6 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t8.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _t8.page_rank_0_256 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_266, + _t8.anything_n, + NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link_264, + _t8.n_out, + _s13.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t18 + ON _t18.l_source = _t8.s_key + JOIN _t15 AS _s13 + ON _s13.s_key = _t18.l_target OR _t18.l_target IS NULL + WHERE + _t8.dummy_link_254 +), _t4 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t6.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t6.page_rank_0_266 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_276, + _t6.anything_n, + NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_274, + _t6.n_out, + _s17.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t19 + ON _t19.l_source = _t6.s_key + JOIN _t15 AS _s17 + ON _s17.s_key = _t19.l_target OR _t19.l_target IS NULL + WHERE + _t6.dummy_link_264 +), _t2 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t4.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t4.page_rank_0_276 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_286, + NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_284, + _s21.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t20 + ON _t20.l_source = _t4.s_key + JOIN _t15 AS _s21 + ON _s21.s_key = _t20.l_target OR _t20.l_target IS NULL + WHERE + _t4.dummy_link_274 +) +SELECT + s_key AS key, + ROUND(page_rank_0_286, 5) AS page_rank +FROM _t2 +WHERE + dummy_link_284 +ORDER BY + s_key diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index d95d35562..d2c01690a 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1043,6 +1043,10 @@ class PyDoughPandasTest: in the output and just use the same column names as in the reference solution. - `args` (optional): additional arguments to pass to the PyDough function. + - `skip_relational`: (optional): if True, does not run the test as part of + relational plan testing. Default is False. + - `skip_sql`: (optional): if True, does not run the test as part of SQL + testing. Default is False. """ pydough_function: Callable[..., UnqualifiedNode] @@ -1090,6 +1094,16 @@ class PyDoughPandasTest: executing it. If None, no additional arguments are passed. """ + skip_relational: bool = False + """ + If True, does not run the test as part of relational plan testing. + """ + + skip_sql: bool = False + """ + If True, does not run the test as part of SQL testing. + """ + def run_relational_test( self, fetcher: graph_fetcher, @@ -1111,6 +1125,10 @@ def run_relational_test( against the expected relational plan text in the file. `config`: The PyDough configuration to use for the test, if any. """ + # Skip if indicated. + if self.skip_relational: + pytest.skip(f"Skipping relational plan test for {self.test_name!r}") + # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( @@ -1165,6 +1183,10 @@ def run_sql_test( to use when generating the SQL test. `config`: The PyDough configuration to use for the test, if any. """ + # Skip if indicated. + if self.skip_sql: + pytest.skip(f"Skipping SQL text test for {self.test_name!r}") + # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( From d6ed6c65d8c66153492e20595651941b0edd5af8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 12:59:30 -0400 Subject: [PATCH 036/143] Adding larger graph & dense graph tests [RUN CI] --- tests/conftest.py | 20 +++++++ tests/test_pipeline_pagerank.py | 95 +++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 1bbe51c09..21b362b2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -515,6 +515,26 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: ), ("PAGERANK_E", 5, [(i, j) for i in range(1, 6) for j in range(1, 6) if i != j]), ("PAGERANK_F", 100, []), + ( + "PAGERANK_G", + 1000, + [ + (j + 1, i + 1) + for i in range(1000) + for j in range(i + 1, 1000) + if str(i) in str(j) + ], + ), + ( + "PAGERANK_H", + 50, + [ + (i, j) + for i in range(1, 51) + for j in range(1, 51) + if i != j and (i < j or i % j == 0) + ], + ), ] # Setup each of the the pagerank databases using the configurations. diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index 56c948395..3385d7199 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -261,6 +261,101 @@ ), id="pagerank_f2", ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_G", + lambda: pd.DataFrame( + { + "key": list(range(1, 1001)), + "page_rank": [0.02471] + + [0.03806] * 9 + + [ + 0.00225 + if i % 10 == 0 + else (0.00234 if len(set(str(i))) == 1 else 0.00205) + for i in range(10, 100) + ] + + [0.00049] * 900, + } + ), + "pagerank_g5", + skip_relational=True, + skip_sql=True, + order_sensitive=True, + args=[5], + ), + id="pagerank_g5", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_H", + lambda: pd.DataFrame( + { + "key": list(range(1, 51)), + "page_rank": [ + 0.07399, + 0.03496, + 0.0178, + 0.01513, + 0.02271, + 0.01277, + 0.02365, + 0.01234, + 0.01065, + 0.01941, + 0.01044, + 0.01207, + 0.00915, + 0.00958, + 0.01116, + 0.01242, + 0.00911, + 0.00929, + 0.00993, + 0.01019, + 0.01077, + 0.01195, + 0.01426, + 0.01403, + 0.02148, + 0.01121, + 0.01154, + 0.0119, + 0.01227, + 0.01273, + 0.01313, + 0.01369, + 0.0142, + 0.01482, + 0.01551, + 0.01628, + 0.01694, + 0.01804, + 0.01914, + 0.0204, + 0.02152, + 0.02352, + 0.02499, + 0.02791, + 0.03029, + 0.0331, + 0.03748, + 0.04604, + 0.04977, + 0.06437, + ], + } + ), + "pagerank_h3", + skip_relational=True, + skip_sql=True, + order_sensitive=True, + args=[3], + ), + id="pagerank_h3", + ), ], ) def pagerank_pipeline_test_data(request) -> PyDoughPandasTest: From ebf5339193f93d883c20b8f1a2783eca95cf3cb4 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 13:02:26 -0400 Subject: [PATCH 037/143] Changing test h to be a higher number of iterations [RUN CI] --- tests/test_pipeline_pagerank.py | 108 +++++----- tests/test_plan_refsols/pagerank_h8.txt | 65 ++++++ tests/test_sql_refsols/pagerank_h8_sqlite.sql | 193 ++++++++++++++++++ 3 files changed, 311 insertions(+), 55 deletions(-) create mode 100644 tests/test_plan_refsols/pagerank_h8.txt create mode 100644 tests/test_sql_refsols/pagerank_h8_sqlite.sql diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index 3385d7199..8d37255a6 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -295,66 +295,64 @@ { "key": list(range(1, 51)), "page_rank": [ - 0.07399, - 0.03496, - 0.0178, - 0.01513, - 0.02271, - 0.01277, - 0.02365, - 0.01234, - 0.01065, - 0.01941, - 0.01044, - 0.01207, - 0.00915, - 0.00958, - 0.01116, - 0.01242, - 0.00911, - 0.00929, - 0.00993, - 0.01019, - 0.01077, - 0.01195, - 0.01426, - 0.01403, - 0.02148, - 0.01121, - 0.01154, - 0.0119, - 0.01227, - 0.01273, - 0.01313, - 0.01369, - 0.0142, - 0.01482, - 0.01551, - 0.01628, - 0.01694, - 0.01804, - 0.01914, - 0.0204, - 0.02152, - 0.02352, - 0.02499, - 0.02791, - 0.03029, - 0.0331, - 0.03748, - 0.04604, - 0.04977, - 0.06437, + 0.07097, + 0.03388, + 0.01732, + 0.01475, + 0.02226, + 0.01251, + 0.02268, + 0.01214, + 0.01054, + 0.01912, + 0.01041, + 0.01197, + 0.00931, + 0.0097, + 0.01115, + 0.01239, + 0.00938, + 0.00957, + 0.0102, + 0.01046, + 0.01102, + 0.01212, + 0.01427, + 0.01417, + 0.02157, + 0.01176, + 0.01213, + 0.01252, + 0.01292, + 0.01341, + 0.01384, + 0.01442, + 0.01496, + 0.01559, + 0.01629, + 0.01706, + 0.01772, + 0.0188, + 0.01986, + 0.02107, + 0.02212, + 0.024, + 0.02537, + 0.02806, + 0.03024, + 0.03281, + 0.0368, + 0.04463, + 0.04808, + 0.06171, ], } ), - "pagerank_h3", - skip_relational=True, - skip_sql=True, + "pagerank_h8", order_sensitive=True, - args=[3], + args=[8], ), - id="pagerank_h3", + id="pagerank_h8", ), ], ) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt new file mode 100644 index 000000000..045977af7 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -0,0 +1,65 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_2414, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_2412, columns={'page_rank_0_2414': page_rank_0_2414, 's_key': s_key}) + PROJECT(columns={'dummy_link_2412': dummy_link_2412, 'page_rank_0_2414': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2413 * page_rank_0_2404 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2413': t0.consider_link_2413, 'dummy_link_2412': t0.dummy_link_2412, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2413': t1.consider_link_2413, 'dummy_link_2412': t1.dummy_link_2412, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404}) + FILTER(condition=dummy_link_2402, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2404': page_rank_0_2404, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2402': dummy_link_2402, 'n_out': n_out, 'page_rank_0_2404': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2403 * page_rank_0_2394 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2403': t0.consider_link_2403, 'dummy_link_2402': t0.dummy_link_2402, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2403': t1.consider_link_2403, 'dummy_link_2402': t1.dummy_link_2402, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394}) + FILTER(condition=dummy_link_2392, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2394': page_rank_0_2394, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2392': dummy_link_2392, 'n_out': n_out, 'page_rank_0_2394': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2393 * page_rank_0_2384 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2393': t0.consider_link_2393, 'dummy_link_2392': t0.dummy_link_2392, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2393': t1.consider_link_2393, 'dummy_link_2392': t1.dummy_link_2392, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384}) + FILTER(condition=dummy_link_2382, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2384': page_rank_0_2384, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2382': dummy_link_2382, 'n_out': n_out, 'page_rank_0_2384': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2383 * page_rank_0_2374 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2383': t0.consider_link_2383, 'dummy_link_2382': t0.dummy_link_2382, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2383': t1.consider_link_2383, 'dummy_link_2382': t1.dummy_link_2382, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374}) + FILTER(condition=dummy_link_2372, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2374': page_rank_0_2374, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2372': dummy_link_2372, 'n_out': n_out, 'page_rank_0_2374': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2373 * page_rank_0_2364 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2373': t0.consider_link_2373, 'dummy_link_2372': t0.dummy_link_2372, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2373': t1.consider_link_2373, 'dummy_link_2372': t1.dummy_link_2372, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364}) + FILTER(condition=dummy_link_2362, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2364': page_rank_0_2364, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2362': dummy_link_2362, 'n_out': n_out, 'page_rank_0_2364': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2363 * page_rank_0_2354 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2363': t0.consider_link_2363, 'dummy_link_2362': t0.dummy_link_2362, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2363': t1.consider_link_2363, 'dummy_link_2362': t1.dummy_link_2362, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354}) + FILTER(condition=dummy_link_2352, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2354': page_rank_0_2354, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2352': dummy_link_2352, 'n_out': n_out, 'page_rank_0_2354': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2353 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2353': t0.consider_link_2353, 'dummy_link_2352': t0.dummy_link_2352, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2353': t1.consider_link_2353, 'dummy_link_2352': t1.dummy_link_2352, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2353': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2352': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2363': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2362': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2373': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2372': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2383': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2382': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2393': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2392': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2403': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2402': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2413': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2412': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_sql_refsols/pagerank_h8_sqlite.sql b/tests/test_sql_refsols/pagerank_h8_sqlite.sql new file mode 100644 index 000000000..017bc5921 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_h8_sqlite.sql @@ -0,0 +1,193 @@ +WITH _t21 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t21 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t16 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + NOT _t22.l_target IS NULL AND _t22.l_source = _t22.l_target AS dummy_link, + _s2.n_out, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t22 + ON _s2.anything_s_key = _t22.l_source + JOIN _t21 AS _s5 + ON _s5.s_key = _t22.l_target OR _t22.l_target IS NULL +), _t14 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t16.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _t16.page_rank_0 + ) AS REAL) / _t16.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_2354, + _t16.anything_n, + NOT _t23.l_target IS NULL AND _t23.l_source = _t23.l_target AS dummy_link_2352, + _t16.n_out, + _s9.s_key + FROM _t16 AS _t16 + JOIN _s1 AS _t23 + ON _t16.s_key = _t23.l_source + JOIN _t21 AS _s9 + ON _s9.s_key = _t23.l_target OR _t23.l_target IS NULL + WHERE + _t16.dummy_link +), _t12 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t14.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t24.l_source <> _t24.l_target OR _t24.l_target IS NULL AS INTEGER) * _t14.page_rank_0_2354 + ) AS REAL) / _t14.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_2364, + _t14.anything_n, + NOT _t24.l_target IS NULL AND _t24.l_source = _t24.l_target AS dummy_link_2362, + _t14.n_out, + _s13.s_key + FROM _t14 AS _t14 + JOIN _s1 AS _t24 + ON _t14.s_key = _t24.l_source + JOIN _t21 AS _s13 + ON _s13.s_key = _t24.l_target OR _t24.l_target IS NULL + WHERE + _t14.dummy_link_2352 +), _t10 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t12.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t25.l_source <> _t25.l_target OR _t25.l_target IS NULL AS INTEGER) * _t12.page_rank_0_2364 + ) AS REAL) / _t12.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_2374, + _t12.anything_n, + NOT _t25.l_target IS NULL AND _t25.l_source = _t25.l_target AS dummy_link_2372, + _t12.n_out, + _s17.s_key + FROM _t12 AS _t12 + JOIN _s1 AS _t25 + ON _t12.s_key = _t25.l_source + JOIN _t21 AS _s17 + ON _s17.s_key = _t25.l_target OR _t25.l_target IS NULL + WHERE + _t12.dummy_link_2362 +), _t8 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t10.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t26.l_source <> _t26.l_target OR _t26.l_target IS NULL AS INTEGER) * _t10.page_rank_0_2374 + ) AS REAL) / _t10.n_out + ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_2384, + _t10.anything_n, + NOT _t26.l_target IS NULL AND _t26.l_source = _t26.l_target AS dummy_link_2382, + _t10.n_out, + _s21.s_key + FROM _t10 AS _t10 + JOIN _s1 AS _t26 + ON _t10.s_key = _t26.l_source + JOIN _t21 AS _s21 + ON _s21.s_key = _t26.l_target OR _t26.l_target IS NULL + WHERE + _t10.dummy_link_2372 +), _t6 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t8.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t27.l_source <> _t27.l_target OR _t27.l_target IS NULL AS INTEGER) * _t8.page_rank_0_2384 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s25.s_key) AS page_rank_0_2394, + _t8.anything_n, + NOT _t27.l_target IS NULL AND _t27.l_source = _t27.l_target AS dummy_link_2392, + _t8.n_out, + _s25.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t27 + ON _t27.l_source = _t8.s_key + JOIN _t21 AS _s25 + ON _s25.s_key = _t27.l_target OR _t27.l_target IS NULL + WHERE + _t8.dummy_link_2382 +), _t4 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t6.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t28.l_source <> _t28.l_target OR _t28.l_target IS NULL AS INTEGER) * _t6.page_rank_0_2394 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s29.s_key) AS page_rank_0_2404, + _t6.anything_n, + NOT _t28.l_target IS NULL AND _t28.l_source = _t28.l_target AS dummy_link_2402, + _t6.n_out, + _s29.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t28 + ON _t28.l_source = _t6.s_key + JOIN _t21 AS _s29 + ON _s29.s_key = _t28.l_target OR _t28.l_target IS NULL + WHERE + _t6.dummy_link_2392 +), _t2 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t4.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t29.l_source <> _t29.l_target OR _t29.l_target IS NULL AS INTEGER) * _t4.page_rank_0_2404 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s33.s_key) AS page_rank_0_2414, + NOT _t29.l_target IS NULL AND _t29.l_source = _t29.l_target AS dummy_link_2412, + _s33.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t29 + ON _t29.l_source = _t4.s_key + JOIN _t21 AS _s33 + ON _s33.s_key = _t29.l_target OR _t29.l_target IS NULL + WHERE + _t4.dummy_link_2402 +) +SELECT + s_key AS key, + ROUND(page_rank_0_2414, 5) AS page_rank +FROM _t2 +WHERE + dummy_link_2412 +ORDER BY + s_key From 80a9ca0e6a47c978455bdd5d4e8caaf406076ce8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 22:09:12 -0400 Subject: [PATCH 038/143] Moved around more errors, got rid of redundant ones, and fixed a cross-related bug --- pydough/conversion/relational_converter.py | 3 +- pydough/errors/pydough_error_builder.py | 149 +++++++++++++++++- pydough/evaluation/evaluate_unqualified.py | 28 +--- pydough/pydough_operators/base_operator.py | 11 +- .../expression_operators/binary_operators.py | 3 +- .../expression_operator.py | 11 +- pydough/qdag/README.md | 26 ++- pydough/qdag/collections/calculate.py | 60 +------ pydough/qdag/collections/collection_access.py | 7 +- pydough/qdag/collections/collection_qdag.py | 13 +- pydough/qdag/collections/global_context.py | 8 +- pydough/qdag/collections/order_by.py | 32 +--- pydough/qdag/collections/partition_by.py | 44 +----- pydough/qdag/collections/partition_child.py | 9 ++ pydough/qdag/collections/top_k.py | 5 +- pydough/qdag/collections/where.py | 31 +--- .../expressions/child_reference_expression.py | 6 +- pydough/qdag/expressions/column_property.py | 7 +- pydough/qdag/expressions/reference.py | 6 +- pydough/qdag/expressions/sided_reference.py | 6 +- pydough/qdag/node_builder.py | 39 +++-- pydough/unqualified/qualification.py | 35 ++-- pydough/unqualified/unqualified_transform.py | 2 +- tests/test_exploration.py | 2 +- tests/test_pipeline_tpch_custom.py | 10 ++ tests/test_plan_refsols/simple_cross_13.txt | 14 ++ .../exploration_examples.py | 5 +- .../simple_pydough_functions.py | 11 ++ tests/test_qdag_collection.py | 7 +- tests/test_qualification_errors.py | 44 ++++++ tests/testing_utilities.py | 22 +-- 31 files changed, 347 insertions(+), 309 deletions(-) create mode 100644 tests/test_plan_refsols/simple_cross_13.txt diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 956bb3698..cf94c88aa 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1315,8 +1315,7 @@ def preprocess_root( for _, column in output_cols: final_terms.append((column, Reference(node, column))) children: list[PyDoughCollectionQDAG] = [] - final_calc: Calculate = Calculate(node, children).with_terms(final_terms) - return final_calc + return Calculate(node, children, final_terms) def make_relational_ordering( diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index cd6f423e0..33c093c4b 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -7,7 +7,8 @@ from pydough.errors import PyDoughException, PyDoughQDAGException if TYPE_CHECKING: - from pydough.qdag import PyDoughCollectionQDAG + from pydough.pydough_operators import PyDoughOperator + from pydough.qdag import PyDoughCollectionQDAG, PyDoughExpressionQDAG class PyDoughErrorBuilder: @@ -35,3 +36,149 @@ def term_not_found( return PyDoughQDAGException( collection.name_mismatch_error(term_name, atol=2, rtol=0.1, min_names=3) ) + + def down_streaming_conflict( + self, collection: "PyDoughCollectionQDAG", term_name: str + ) -> PyDoughException: + """ + Creates an exception for when a term accessed within a collection but + it is unclear whether it is a term of the collection or a term + downstreamed from an ancestor. + + Args: + `collection`: The collection in which the term is being accessed. + `term_name`: The name of the term that caused the ambiguity. + + Returns: + An exception indicating the name access ambiguity. + """ + return PyDoughQDAGException( + f"Unclear whether {term_name!r} refers to a term of the current context or ancestor of collection {collection!r}" + ) + + def cardinality_error( + self, collection: "PyDoughCollectionQDAG", expr: "PyDoughExpressionQDAG" + ) -> PyDoughException: + """ + Creates an exception for when a term is used within a context that + should be singular with regards to the context, but it is plural. + + Args: + `collection`: The collection in which the term is being accessed. + `expr`: The PyDoughQDAG expression + + Returns: + An exception indicating the cardinality error. + """ + raise PyDoughQDAGException( + f"Expected all terms in {collection.standalone_string} to be singular, but encountered a plural expression: {expr}" + ) + + def expected_collection(self, expr: object) -> PyDoughException: + """ + Creates an exception for when a QDAG collection is expected but + something else is found. + """ + from pydough.qdag import PyDoughExpressionQDAG + + if isinstance(expr, PyDoughExpressionQDAG): + return PyDoughQDAGException( + f"Expected a collection, but received an expression: {expr}" + ) + else: + return PyDoughQDAGException( + f"Expected a collection, but received {expr.__class__.__name__}: {expr}" + ) + + def expected_expression(self, expr: object) -> PyDoughException: + """ + Creates an exception for when a QDAG expression is expected but + something else is found. + """ + from pydough.qdag import PyDoughCollectionQDAG + + if isinstance(expr, PyDoughCollectionQDAG): + return PyDoughQDAGException( + f"Expected an expression, but received a collection: {expr}" + ) + else: + return PyDoughQDAGException( + f"Expected an expression, but received {expr.__class__.__name__}: {expr}" + ) + + def type_verification_fail( + self, operator: "PyDoughOperator", args: list[object], message: str + ) -> PyDoughException: + """ + Creates an exception for when type verification fails for an operator. + + Args: + `operator`: The operator that failed type verification. + `args`: The arguments passed to the operator. + `message`: The error message explaining the typing failure. + + Returns: + An exception indicating the type verification failure. + """ + arg_strings: list[str] = [str(arg) for arg in args] + raise PyDoughQDAGException( + f"Invalid operator invocation {operator.to_string(arg_strings)!r}: {message}" + ) + + def type_inference_fail( + self, operator: "PyDoughOperator", args: list[object], message: str + ) -> PyDoughException: + """ + Creates an exception for when return type inference fails for an + expression function operator. + + Args: + `operator`: The operator that failed type inference. + `args`: The arguments passed to the operator. + `message`: The error message explaining the inference failure. + + Returns: + An exception indicating the type inference failure. + """ + arg_strings: list[str] = [str(arg) for arg in args] + raise PyDoughQDAGException( + f"Unable to infer the return type of operator invocation {operator.to_string(arg_strings)!r}: {message}" + ) + + def bad_columns(self, columns: object) -> PyDoughException: + """ + Creates an exception for when the `columns` to `to_sql` or `to_df` is + not valid. + + Args: + `columns`: The columns argument that caused the error. + + Returns: + An exception indicating the bad `columns` argument. + """ + if isinstance(columns, list): + for column in columns: + if not isinstance(column, str): + return PyDoughQDAGException( + f"Expected `columns` argument to be a list of strings, found {column.__class__.__name__}" + ) + return PyDoughQDAGException( + "Expected `columns` argument to be a non-empty list" + ) + elif isinstance(columns, dict): + for alias, column in columns.items(): + if not isinstance(alias, str): + return PyDoughQDAGException( + f"Expected `columns` argument to be a dictionary where the keys are strings, found {alias.__class__.__name__}" + ) + if not isinstance(column, str): + return PyDoughQDAGException( + f"Expected `columns` argument to be a dictionary where the values are strings, found {column.__class__.__name__}" + ) + return PyDoughQDAGException( + "Expected `columns` argument to be a non-empty dictionary" + ) + else: + return PyDoughQDAGException( + f"Expected `columns` argument to be a list or dictionary, found {columns.__class__.__name__}" + ) diff --git a/pydough/evaluation/evaluate_unqualified.py b/pydough/evaluation/evaluate_unqualified.py index cca3101c8..d7693941b 100644 --- a/pydough/evaluation/evaluate_unqualified.py +++ b/pydough/evaluation/evaluate_unqualified.py @@ -13,7 +13,6 @@ from pydough.conversion import convert_ast_to_relational from pydough.database_connectors import DatabaseContext from pydough.errors import ( - PyDoughQDAGException, PyDoughSessionException, ) from pydough.metadata import GraphMetadata @@ -84,25 +83,18 @@ def _load_column_selection(kwargs: dict[str, object]) -> list[tuple[str, str]] | return None elif isinstance(columns_arg, list): for column in columns_arg: - assert isinstance(column, str), ( - f"Expected column name in `columns` argument to be a string, found {column.__class__.__name__}" - ) + if not isinstance(column, str): + raise pydough.active_session.error_builder.bad_columns(columns_arg) result.append((column, column)) elif isinstance(columns_arg, dict): for alias, column in columns_arg.items(): - assert isinstance(alias, str), ( - f"Expected alias name in `columns` argument to be a string, found {column.__class__.__name__}" - ) - assert isinstance(column, str), ( - f"Expected column name in `columns` argument to be a string, found {column.__class__.__name__}" - ) + if not isinstance(column, str) and isinstance(alias, str): + raise pydough.active_session.error_builder.bad_columns(columns_arg) result.append((alias, column)) else: - raise PyDoughQDAGException( - f"Expected `columns` argument to be a list or dictionary, found {columns_arg.__class__.__name__}" - ) + raise pydough.active_session.error_builder.bad_columns(columns_arg) if len(result) == 0: - raise PyDoughQDAGException("Column selection must not be empty") + raise pydough.active_session.error_builder.bad_columns(columns_arg) return result @@ -128,9 +120,7 @@ def to_sql(node: UnqualifiedNode, **kwargs) -> str: graph, config, database = _load_session_info(**kwargs) qualified: PyDoughQDAG = qualify_node(node, graph, config) if not isinstance(qualified, PyDoughCollectionQDAG): - raise PyDoughQDAGException( - f"Final qualified expression must be a collection, found {qualified.__class__.__name__}" - ) + raise pydough.active_session.error_builder.expected_collection(qualified) relational: RelationalRoot = convert_ast_to_relational( qualified, column_selection, config, database.dialect ) @@ -161,9 +151,7 @@ def to_df(node: UnqualifiedNode, **kwargs) -> pd.DataFrame: graph, config, database = _load_session_info(**kwargs) qualified: PyDoughQDAG = qualify_node(node, graph, config) if not isinstance(qualified, PyDoughCollectionQDAG): - raise PyDoughQDAGException( - f"Final qualified expression must be a collection, found {qualified.__class__.__name__}" - ) + raise pydough.active_session.error_builder.expected_collection(qualified) relational: RelationalRoot = convert_ast_to_relational( qualified, column_selection, config, database.dialect ) diff --git a/pydough/pydough_operators/base_operator.py b/pydough/pydough_operators/base_operator.py index a7750c1a3..1983eb406 100644 --- a/pydough/pydough_operators/base_operator.py +++ b/pydough/pydough_operators/base_operator.py @@ -61,12 +61,11 @@ def verify_allows_args(self, args: list[Any]) -> None: try: self.verifier.accepts(args) except PyDoughQDAGException as e: - # If the verifier failed, raise the error with the same traceback - # but prepend it with information about the operator and args - # that caused the failure. - arg_strings: list[str] = [str(arg) for arg in args] - msg = f"Invalid operator invocation {self.to_string(arg_strings)!r}: {e}" - raise PyDoughQDAGException(msg).with_traceback(e.__traceback__) + import pydough + + raise pydough.active_session.error_builder.type_verification_fail( + self, args, str(e) + ) @abstractmethod def to_string(self, arg_strings: list[str]) -> str: diff --git a/pydough/pydough_operators/expression_operators/binary_operators.py b/pydough/pydough_operators/expression_operators/binary_operators.py index 2fb74135b..eeed8319b 100644 --- a/pydough/pydough_operators/expression_operators/binary_operators.py +++ b/pydough/pydough_operators/expression_operators/binary_operators.py @@ -6,7 +6,6 @@ from enum import Enum -from pydough.errors import PyDoughQDAGException from pydough.pydough_operators.type_inference import ( ExpressionTypeDeducer, TypeVerifier, @@ -44,7 +43,7 @@ def from_string(s: str) -> "BinOp": for op in BinOp.__members__.values(): if s == op.value: return op - raise PyDoughQDAGException(f"Unrecognized operation: {s!r}") + raise ValueError(f"Unrecognized operation: {s!r}") BinOp.__members__.items() diff --git a/pydough/pydough_operators/expression_operators/expression_operator.py b/pydough/pydough_operators/expression_operators/expression_operator.py index 8e437dae0..7b801342f 100644 --- a/pydough/pydough_operators/expression_operators/expression_operator.py +++ b/pydough/pydough_operators/expression_operators/expression_operator.py @@ -7,6 +7,7 @@ from abc import abstractmethod from typing import Any +from pydough.errors import PyDoughQDAGException from pydough.pydough_operators.base_operator import PyDoughOperator from pydough.pydough_operators.type_inference import ( ExpressionTypeDeducer, @@ -97,4 +98,12 @@ def infer_return_type(self, args: list[Any]) -> PyDoughType: Raises: `PyDoughQDAGException` if `args` is invalid for this operator. """ - return self.deducer.infer_return_type(args) + + try: + return self.deducer.infer_return_type(args) + except PyDoughQDAGException as e: + import pydough + + raise pydough.active_session.error_builder.type_inference_fail( + self, args, str(e) + ) diff --git a/pydough/qdag/README.md b/pydough/qdag/README.md index 4284e4b1a..8b4fd054b 100644 --- a/pydough/qdag/README.md +++ b/pydough/qdag/README.md @@ -79,8 +79,7 @@ child_reference_node = builder.build_child_reference_expression([child_collectio # Build a CALCULATE node # Equivalent PyDough code: `TPCH.Nations.CALCULATE(region_name=region.name)` -calculate_node = builder.build_calc(table_collection, [child_collection]) -calculate_node = calculate_node.with_terms([("region_name", child_reference_node)]) +calculate_node = builder.build_calc(table_collection, [child_collection], [("region_name", child_reference_node)]) # Build a WHERE node # Equivalent PyDough code: `TPCH.Nations.WHERE(region.name == "ASIA")` @@ -88,8 +87,7 @@ condition = builder.build_expression_function_call( "EQU", [child_reference_node, builder.build_literal("ASIA", StringType())] ) -where_node = builder.build_where(table_collection, [child_collection]) -where_node = where_node.with_condition(condition) +where_node = builder.build_where(table_collection, [child_collection], condition) # Build a SINGULAR node # Equivalent PyDough code: `Regions.CALCULATE(n_4_nation=nations.WHERE(key == 4).SINGULAR().name)` @@ -103,15 +101,13 @@ key_ref = builder.build_reference(nations_sub_collection, "key") literal_4 = builder.build_literal(4, NumericType()) condition = builder.build_expression_function_call("EQU", [key_ref, literal_4]) # Build WHERE node with condition -where_node = builder.build_where(nations_sub_collection, []) -where_node = where_node.with_condition(condition) +where_node = builder.build_where(nations_sub_collection, [], condition) # Create SINGULAR node from filtered result singular_node = builder.build_singular(where_node) # Build reference node for name reference_node = builder.build_reference(singular_node, "name") # Build CALCULATE node with calculated term -calculate_node = builder.build_calc(regions_collection, [nations_sub_collection]) -calculate_node = calculate_node.with_terms([("n_4_nation", reference_node)]) +calculate_node = builder.build_calc(regions_collection, [nations_sub_collection], [("n_4_nation", reference_node)]) # Build an ORDER BY node @@ -119,20 +115,17 @@ calculate_node = calculate_node.with_terms([("n_4_nation", reference_node)]) collation_expression = builder.build_collation_expression( reference_node, True, False ) -order_by_node = builder.build_order(table_collection, []) -order_by_node = order_by_node.with_collation([collation_expression]) +order_by_node = builder.build_order(table_collection, [], [collation_expression]) # Build a TOP K node # Equivalent PyDough code: `TPCH.Nations.TOP_K(5, by=name.ASC(na_pos='first'))` -top_k_node = builder.build_top_k(table_collection, [], 5) -top_k_node = top_k_node.with_collation([collation_expression]) +top_k_node = builder.build_top_k(table_collection, [], 5 [collation_expression]) # Build a PARTITION BY node # Equivalent PyDough code: `TPCH.PARTITION(Parts, name="p", by=part_type)` part_collection = builder.build_child_access("Parts", global_context_node) partition_key = builder.build_reference(part_collection, "part_type") -partition_by_node = builder.build_partition(part_collection, child_collection, "p") -partition_by_node = partition_by_node.with_keys([partition_key]) +partition_by_node = builder.build_partition(part_collection, child_collection, "p", [partition_key]) # Build a child reference collection node # Equivalent PyDough code: `Nations.CALCULATE(n_customers=COUNT(customers))` @@ -146,8 +139,7 @@ count_call = builder.build_expression_function_call( "COUNT", [child_reference_collection_node] ) -calculate_node = builder.build_calc(table_collection, [customers_child]) -calculate_node = calculate_node.with_terms([("n_customers", count_call)]) +calculate_node = builder.build_calc(table_collection, [customers_child], [("n_customers", count_call)]) # Build a window function call node # Equivalent PyDough code: `RANKING(by=TPCH.Nations.name, levels=1, allow_ties=True)` @@ -157,7 +149,7 @@ window_call_node = builder.build_window_call(RANKING, [reference_node], 1, {"all ### HAS/HASNOT Rewrite -The `has_hasnot_rewrite` function is used to transform `HAS` and `HASNOT` expressions in the QDAG. It is used in the `with_terms`, `with_condition`, and `with_collation` calls of the various child operator classes to rewrite all `HAS(x)` into `COUNT(X) > 0` and all `HASNOT(X)` into `COUNT(X) == 0` unless they are in the conjunction of a `WHERE` clause. +The `has_hasnot_rewrite` function is used to transform `HAS` and `HASNOT` expressions in the QDAG. It is used in constructors of the various child operator classes to rewrite all `HAS(x)` into `COUNT(X) > 0` and all `HASNOT(X)` into `COUNT(X) == 0` unless they are in the conjunction of a `WHERE` clause. Below are some examples of PyDough snippets that are/aren't affected by the rewrite. diff --git a/pydough/qdag/collections/calculate.py b/pydough/qdag/collections/calculate.py index b4d48664b..585b8c476 100644 --- a/pydough/qdag/collections/calculate.py +++ b/pydough/qdag/collections/calculate.py @@ -12,9 +12,7 @@ from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( - BackReferenceExpression, PyDoughExpressionQDAG, - Reference, ) from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite @@ -31,63 +29,20 @@ def __init__( self, predecessor: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + terms: list[tuple[str, PyDoughExpressionQDAG]], ): super().__init__(predecessor, children) - # Not initialized until with_terms is called - self._calc_term_indices: dict[str, int] | None = None - self._calc_term_values: dict[str, PyDoughExpressionQDAG] | None = None + self._calc_term_indices: dict[str, int] = {} + self._calc_term_values: dict[str, PyDoughExpressionQDAG] = {} self._all_term_names: set[str] = set() self._ancestral_mapping: dict[str, int] = dict( predecessor.ancestral_mapping.items() ) self._calc_terms: set[str] = set() - def with_terms(self, terms: list[tuple[str, PyDoughExpressionQDAG]]) -> "Calculate": - """ - Specifies the terms that are calculated inside of a CALCULATE node, - returning the mutated CALCULATE node afterwards. This is called after - the CALCULATE node is created so that the terms can be expressions that - reference child nodes of the CALCULATE. However, this must be called - on the CALCULATE node before any properties are accessed by - `calc_terms`, `all_terms`, `to_string`, etc. - - Args: - `terms`: the list of terms calculated in the CALCULATE node as a - list of tuples in the form `(name, expression)`. Each `expression` - can contain `ChildReferenceExpression` instances that refer to a - property of one of the children of the CALCULATE node. - - Returns: - The mutated CALCULATE node (which has also been modified in-place). - - Raises: - `PyDoughQDAGException` if the terms have already been added to the - CALCULATE node. - """ - if self._calc_term_indices is not None: - raise PyDoughQDAGException( - "Cannot call `with_terms` on a CALCULATE node more than once" - ) # Include terms from the predecessor, with the terms from this # CALCULATE added in. - self._calc_term_indices = {} - self._calc_term_values = {} for idx, (name, value) in enumerate(terms): - ancestral_idx: int = self.ancestral_mapping.get(name, 0) - if ancestral_idx > 0: - # Ignore no-op back-references, e.g.: - # region(region_name=name).customers(region_name=region_name) - if not ( - ( - isinstance(value, BackReferenceExpression) - and value.back_levels == ancestral_idx - and value.term_name == name - ) - or isinstance(value, Reference) - ): - raise PyDoughQDAGException( - f"Cannot redefine term {name!r} in CALCULATE that is already defined in an ancestor" - ) self._calc_term_indices[name] = idx self._calc_term_values[name] = has_hasnot_rewrite(value, False) self._all_term_names.add(name) @@ -95,7 +50,6 @@ def with_terms(self, terms: list[tuple[str, PyDoughExpressionQDAG]]) -> "Calcula self.ancestral_mapping[name] = 0 self.all_terms.update(self.preceding_context.all_terms) self.verify_singular_terms(self._calc_term_values.values()) - return self @property def calc_term_indices( @@ -105,10 +59,6 @@ def calc_term_indices( Mapping of each named expression of the CALCULATE to the index of the ordinal position of the property when included in a CALCULATE. """ - if self._calc_term_indices is None: - raise PyDoughQDAGException( - "Cannot access `calc_term_indices` of a CALCULATE node before adding calc terms with `with_terms`" - ) return self._calc_term_indices @property @@ -119,10 +69,6 @@ def calc_term_values( Mapping of each named expression of the CALCULATE to the QDAG node for that expression. """ - if self._calc_term_values is None: - raise PyDoughQDAGException( - "Cannot access `_calc_term_values` of a CALCULATE node before adding calc terms with `with_terms`" - ) return self._calc_term_values @property diff --git a/pydough/qdag/collections/collection_access.py b/pydough/qdag/collections/collection_access.py index 75f167e6e..2fae30d5b 100644 --- a/pydough/qdag/collections/collection_access.py +++ b/pydough/qdag/collections/collection_access.py @@ -8,6 +8,7 @@ from functools import cache +import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -115,8 +116,8 @@ def get_term(self, term_name: str) -> PyDoughQDAG: # Verify that the ancestor name is not also a name in the current # context. if term_name in self.calc_terms: - raise PyDoughQDAGException( - f"Cannot have term name {term_name!r} used in an ancestor of collection {self!r}" + raise pydough.active_session.error_builder.down_streaming_conflict( + collection=self, term_name=term_name ) # Create a back-reference to the ancestor term. return BackReferenceExpression( @@ -148,7 +149,7 @@ def get_term_from_property(self, term_name: str) -> PyDoughQDAG: elif isinstance(property, TableColumnMetadata): return ColumnProperty(property) else: - raise PyDoughQDAGException( + raise NotImplementedError( f"Unsupported property type for collection access: {property.__class__.name}" ) diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index d7a7e9b3d..a68ba1468 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -13,7 +13,6 @@ import numpy as np import pydough -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions.collation_expression import CollationExpression from pydough.qdag.expressions.expression_qdag import PyDoughExpressionQDAG @@ -172,8 +171,8 @@ def verify_singular_terms(self, exprs: Iterable[PyDoughExpressionQDAG]) -> None: relative_context: PyDoughCollectionQDAG = self.starting_predecessor for expr in exprs: if not expr.is_singular(relative_context): - raise PyDoughQDAGException( - f"Expected all terms in {self.standalone_string} to be singular, but encountered a plural expression: {expr.to_string()}" + raise pydough.active_session.error_builder.cardinality_error( + collection=self, expr=expr ) @abstractmethod @@ -224,9 +223,7 @@ def get_expr(self, term_name: str) -> PyDoughExpressionQDAG: """ term = self.get_term(term_name) if not isinstance(term, PyDoughExpressionQDAG): - raise PyDoughQDAGException( - f"Property {term_name!r} of {self} is not an expression" - ) + raise pydough.active_session.error_builder.expected_expression(term) return term def get_collection(self, term_name: str) -> "PyDoughCollectionQDAG": @@ -243,9 +240,7 @@ def get_collection(self, term_name: str) -> "PyDoughCollectionQDAG": """ term = self.get_term(term_name) if not isinstance(term, PyDoughCollectionQDAG): - raise PyDoughQDAGException( - f"Property {term_name!r} of {self} is not a collection" - ) + raise pydough.active_session.error_builder.expected_collection(term) return term @property diff --git a/pydough/qdag/collections/global_context.py b/pydough/qdag/collections/global_context.py index af034810c..38f56154d 100644 --- a/pydough/qdag/collections/global_context.py +++ b/pydough/qdag/collections/global_context.py @@ -109,10 +109,14 @@ def unique_terms(self) -> list[str]: return [] def is_singular(self, context: PyDoughCollectionQDAG) -> bool: - raise PyDoughQDAGException(f"Cannot call is_singular on {self!r}") + return ( + self.ancestor_context is None + or self.ancestor_context.starting_predecessor == context + or self.ancestor_context.is_singular(context) + ) def get_expression_position(self, expr_name: str) -> int: - raise PyDoughQDAGException(f"Cannot call get_expression_position on {self!r}") + raise NotImplementedError(f"Cannot call get_expression_position on {self!r}") def get_term(self, term_name: str) -> PyDoughQDAG: self.verify_term_exists(term_name) diff --git a/pydough/qdag/collections/order_by.py b/pydough/qdag/collections/order_by.py index 0322f5310..54e730a08 100644 --- a/pydough/qdag/collections/order_by.py +++ b/pydough/qdag/collections/order_by.py @@ -8,7 +8,6 @@ from functools import cache -from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import CollationExpression from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite @@ -25,33 +24,9 @@ def __init__( self, predecessor: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + collation: list[CollationExpression], ): super().__init__(predecessor, children) - self._collation: list[CollationExpression] | None = None - - def with_collation(self, collation: list[CollationExpression]) -> "OrderBy": - """ - Specifies the expressions that are used to do the ordering in an - ORDERBY node returning the mutated ORDERBY node afterwards. This is - called after the ORDERBY node is created so that the terms can be - expressions that reference child nodes of the ORDERBY. However, this - must be called on the ORDERBY node before any properties are accessed - by `calc_terms`, `all_terms`, `to_string`, etc. - - Args: - `collation`: the list of collation nodes to order by. - - Returns: - The mutated ORDERBY node (which has also been modified in-place). - - Raises: - `PyDoughQDAGException` if the condition has already been added to - the WHERE node. - """ - if self._collation is not None: - raise PyDoughQDAGException( - "Cannot call `with_collation` more than once per ORDERBY node" - ) self._collation = [ CollationExpression( has_hasnot_rewrite(col.expr, False), col.asc, col.na_last @@ -59,17 +34,12 @@ def with_collation(self, collation: list[CollationExpression]) -> "OrderBy": for col in collation ] self.verify_singular_terms(self._collation) - return self @property def collation(self) -> list[CollationExpression]: """ The ordering keys for the ORDERBY clause. """ - if self._collation is None: - raise PyDoughQDAGException( - "Cannot access `collation` of an ORDERBY node before calling `with_collation`" - ) return self._collation @property diff --git a/pydough/qdag/collections/partition_by.py b/pydough/qdag/collections/partition_by.py index 0d315f213..f6d70f627 100644 --- a/pydough/qdag/collections/partition_by.py +++ b/pydough/qdag/collections/partition_by.py @@ -9,7 +9,6 @@ from functools import cache -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( BackReferenceExpression, @@ -34,49 +33,28 @@ def __init__( ancestor: PyDoughCollectionQDAG, child: PyDoughCollectionQDAG, name: str, + keys: list[ChildReferenceExpression], ): super().__init__([child]) self._ancestor_context: PyDoughCollectionQDAG = ancestor self._child: PyDoughCollectionQDAG = child self._name: str = name - self._keys: list[PartitionKey] | None = None self._key_name_indices: dict[str, int] = {} self._ancestral_mapping: dict[str, int] = { name: level + 1 for name, level in ancestor.ancestral_mapping.items() } self._calc_terms: set[str] = set() self._all_terms: set[str] = set(self.ancestral_mapping) | {self.child.name} - - @property - def name(self) -> str: - return self._name - - def with_keys(self, keys: list[ChildReferenceExpression]) -> "PartitionBy": - """ - Specifies the references to the keys that should be used to partition - the child node. - - Args: - `keys`: the list of references to the keys to partition on. - - Returns: - The mutated PARTITION BY node (which has also been modified in-place). - - Raises: - `PyDoughQDAGException` if the keys have already been added to - the PARTITION BY node. - """ - if self._keys is not None: - raise PyDoughQDAGException( - "Cannot call `with_keys` more than once per PARTITION BY node" - ) self._keys = [PartitionKey(self, key) for key in keys] for idx, ref in enumerate(keys): self._key_name_indices[ref.term_name] = idx self._calc_terms.add(ref.term_name) self.all_terms.update(self._calc_terms) self.verify_singular_terms(self._keys) - return self + + @property + def name(self) -> str: + return self._name @property def ancestor_context(self) -> PyDoughCollectionQDAG: @@ -91,10 +69,6 @@ def keys(self) -> list[PartitionKey]: """ The partitioning keys for the PARTITION BY clause. """ - if self._keys is None: - raise PyDoughQDAGException( - "Cannot access `keys` of an PARTITION BY node before calling `with_keys`" - ) return self._keys @property @@ -103,10 +77,6 @@ def key_name_indices(self) -> dict[str, int]: The names of the partitioning keys for the PARTITION BY clause and the index they have in a CALCULATE. """ - if self._keys is None: - raise PyDoughQDAGException( - "Cannot access `keys` of an PARTITION BY node before calling `with_keys`" - ) return self._key_name_indices @property @@ -199,8 +169,4 @@ def to_tree_form(self, is_last: bool) -> CollectionTreeForm: return tree_form def equals(self, other: object) -> bool: - if self._keys is None: - raise PyDoughQDAGException( - "Cannot invoke `equals` before calling `with_keys`" - ) return isinstance(other, PartitionBy) and self._keys == other._keys diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index e10222ebf..9a461c1c6 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -8,6 +8,7 @@ from functools import cache +import pydough from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, @@ -90,7 +91,15 @@ def inherited_downstreamed_terms(self) -> set[str]: @cache def get_term(self, term_name: str): self.verify_term_exists(term_name) + # Special handling of terms down-streamed from an ancestor of the + # partition child. if term_name in self.ancestral_mapping: + # Verify that the ancestor name is not also a name in the current + # context. + if term_name in self.calc_terms: + raise pydough.active_session.error_builder.down_streaming_conflict( + collection=self, term_name=term_name + ) return BackReferenceExpression( self, term_name, self.ancestral_mapping[term_name] ) diff --git a/pydough/qdag/collections/top_k.py b/pydough/qdag/collections/top_k.py index 5b8b2219f..eda830644 100644 --- a/pydough/qdag/collections/top_k.py +++ b/pydough/qdag/collections/top_k.py @@ -9,6 +9,8 @@ from functools import cache +from pydough.qdag.expressions.collation_expression import CollationExpression + from .collection_qdag import PyDoughCollectionQDAG from .order_by import OrderBy @@ -23,9 +25,10 @@ def __init__( predecessor: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], records_to_keep: int, + collation: list[CollationExpression], ): - super().__init__(predecessor, children) self._records_to_keep = records_to_keep + super().__init__(predecessor, children, collation) @property def records_to_keep(self) -> int: diff --git a/pydough/qdag/collections/where.py b/pydough/qdag/collections/where.py index 85d81bb69..fcb8e6d6e 100644 --- a/pydough/qdag/collections/where.py +++ b/pydough/qdag/collections/where.py @@ -8,7 +8,6 @@ from functools import cache -from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import PyDoughExpressionQDAG from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite @@ -25,45 +24,17 @@ def __init__( self, predecessor: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + condition: PyDoughExpressionQDAG, ): super().__init__(predecessor, children) - self._condition: PyDoughExpressionQDAG | None = None - - def with_condition(self, condition: PyDoughExpressionQDAG) -> "Where": - """ - Specifies the condition that should be used by the WHERE node. This is - called after the WHERE node is created so that the condition can be an - expressions that reference child nodes of the WHERE. However, this must - be called on the WHERE node before any properties are accessed by - `to_string`, `equals`, etc. - - Args: - `condition`: the expression used to filter. - - Returns: - The mutated WHERE node (which has also been modified in-place). - - Raises: - `PyDoughQDAGException` if the condition has already been added to - the WHERE node. - """ - if self._condition is not None: - raise PyDoughQDAGException( - "Cannot call `with_condition` more than once per Where node" - ) self._condition = has_hasnot_rewrite(condition, True) self.verify_singular_terms([self._condition]) - return self @property def condition(self) -> PyDoughExpressionQDAG: """ The predicate expression for the WHERE clause. """ - if self._condition is None: - raise PyDoughQDAGException( - "Cannot access `condition` of a WHERE node before adding the predicate with `with_condition`" - ) return self._condition @property diff --git a/pydough/qdag/expressions/child_reference_expression.py b/pydough/qdag/expressions/child_reference_expression.py index aeaa9e0de..72294f312 100644 --- a/pydough/qdag/expressions/child_reference_expression.py +++ b/pydough/qdag/expressions/child_reference_expression.py @@ -8,7 +8,6 @@ from functools import cache -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG @@ -29,10 +28,7 @@ def __init__( self._child_idx: int = child_idx self._term_name: str = term_name self._expression: PyDoughExpressionQDAG = self._collection.get_expr(term_name) - if not self.expression.is_singular(collection.starting_predecessor): - raise PyDoughQDAGException( - f"Cannot reference plural expression {self.expression} from {self.collection}" - ) + collection.verify_singular_terms([self.expression]) @property def child_idx(self) -> int: diff --git a/pydough/qdag/expressions/column_property.py b/pydough/qdag/expressions/column_property.py index 5756f7f13..d44e2ab38 100644 --- a/pydough/qdag/expressions/column_property.py +++ b/pydough/qdag/expressions/column_property.py @@ -5,7 +5,7 @@ __all__ = ["ColumnProperty"] -from pydough.errors import PyDoughQDAGException +from pydough.metadata.collections import SimpleTableMetadata from pydough.metadata.properties import TableColumnMetadata from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.types import PyDoughType @@ -46,10 +46,7 @@ def requires_enclosing_parens(self, parent: PyDoughExpressionQDAG) -> bool: return False def to_string(self, tree_form: bool = False) -> str: - if not hasattr(self.column_property.collection, "table_path"): - raise PyDoughQDAGException( - f"collection of {self.column_property.error_name} does not have a 'table_path' field" - ) + assert isinstance(self.column_property.collection, SimpleTableMetadata) table_path: str = self.column_property.collection.table_path column_name: str = self.column_property.column_name return f"Column[{table_path}.{column_name}]" diff --git a/pydough/qdag/expressions/reference.py b/pydough/qdag/expressions/reference.py index ebbb5c468..47b0d39cc 100644 --- a/pydough/qdag/expressions/reference.py +++ b/pydough/qdag/expressions/reference.py @@ -6,7 +6,6 @@ __all__ = ["Reference"] -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG from pydough.types import PyDoughType @@ -24,10 +23,7 @@ def __init__(self, collection: PyDoughCollectionQDAG, term_name: str): self._collection: PyDoughCollectionQDAG = collection self._term_name: str = term_name self._expression: PyDoughExpressionQDAG = collection.get_expr(term_name) - if not self.expression.is_singular(collection.starting_predecessor): - raise PyDoughQDAGException( - f"Cannot reference plural expression {self.expression} from {self.collection}" - ) + collection.verify_singular_terms([self._expression]) @property def collection(self) -> PyDoughCollectionQDAG: diff --git a/pydough/qdag/expressions/sided_reference.py b/pydough/qdag/expressions/sided_reference.py index c8b150fcb..a53a75587 100644 --- a/pydough/qdag/expressions/sided_reference.py +++ b/pydough/qdag/expressions/sided_reference.py @@ -6,7 +6,6 @@ __all__ = ["SidedReference"] -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG from pydough.types import PyDoughType @@ -34,10 +33,7 @@ def __init__( else: base_collection = collection.starting_predecessor self._expression: PyDoughExpressionQDAG = base_collection.get_expr(term_name) - if not self.expression.is_singular(collection.starting_predecessor): - raise PyDoughQDAGException( - f"Cannot reference plural expression {self.expression} from {self.collection}" - ) + collection.starting_predecessor.verify_singular_terms([self.expression]) self._is_parent: bool = is_parent @property diff --git a/pydough/qdag/node_builder.py b/pydough/qdag/node_builder.py index d20b26ad2..14eed78e1 100644 --- a/pydough/qdag/node_builder.py +++ b/pydough/qdag/node_builder.py @@ -257,95 +257,100 @@ def build_calculate( self, preceding_context: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + terms: list[tuple[str, PyDoughExpressionQDAG]], ) -> Calculate: """ - Creates a CALCULATE instance, but `with_terms` still needs to be called on - the output. + Creates a CALCULATE instance. Args: `preceding_context`: the preceding collection. - `children`: the child collections accessed by the CALCULATE term. + `children`: the child collections accessed by the CALCULATE clause. + `terms`: the terms to be defined in the CALCULATE. Returns: - The newly created PyDough CALCULATE term. + The newly created PyDough CALCULATE clause. """ - return Calculate(preceding_context, children) + return Calculate(preceding_context, children, terms) def build_where( self, preceding_context: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + condition: PyDoughExpressionQDAG, ) -> Where: """ - Creates a WHERE instance, but `with_condition` still needs to be called on - the output. + Creates a WHERE instance. Args: `preceding_context`: the preceding collection. `children`: the child collections accessed by the WHERE term. + `condition`: the condition to be applied in the WHERE clause. Returns: The newly created PyDough WHERE instance. """ - return Where(preceding_context, children) + return Where(preceding_context, children, condition) def build_order( self, preceding_context: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + collation: list[CollationExpression], ) -> OrderBy: """ - Creates a ORDERBY instance, but `with_collation` still needs to be called on - the output. + Creates a ORDERBY instance. Args: `preceding_context`: the preceding collection. `children`: the child collections accessed by the ORDERBY term. + `collation`: the collation expressions to be used in the ORDERBY. Returns: The newly created PyDough ORDERBY instance. """ - return OrderBy(preceding_context, children) + return OrderBy(preceding_context, children, collation) def build_top_k( self, preceding_context: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], records_to_keep: int, + collation: list[CollationExpression], ) -> TopK: """ - Creates a TOP K instance, but `with_collation` still needs to be called on - the output. + Creates a TOP K instance. Args: `preceding_context`: the preceding collection. `children`: the child collections accessed by the ORDERBY term. `records_to_keep`: the `K` value in the TOP K. + `collation`: the collation expressions to be used in the TOP K. Returns: The newly created PyDough TOP K instance. """ - return TopK(preceding_context, children, records_to_keep) + return TopK(preceding_context, children, records_to_keep, collation) def build_partition( self, preceding_context: PyDoughCollectionQDAG, child: PyDoughCollectionQDAG, name: str, + keys: list[ChildReferenceExpression], ) -> PartitionBy: """ - Creates a PARTITION BY instance, but `with_keys` still needs to be called on - the output. + Creates a PARTITION BY instance. Args: `preceding_context`: the preceding collection. `child`: the child that is the input to the PARTITION BY term. `name`: the name that is used to refer to the partitioned data. + `keys`: the partitioning keys to be used in the PARTITION BY. Returns: The newly created PyDough PARTITION BY instance. """ - return PartitionBy(preceding_context, child, name) + return PartitionBy(preceding_context, child, name, keys) def build_child_reference_collection( self, diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index e6fdf3e44..8179436c0 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -19,7 +19,6 @@ ) from pydough.qdag import ( AstNodeBuilder, - Calculate, ChildOperatorChildAccess, ChildReferenceExpression, CollationExpression, @@ -27,7 +26,6 @@ ExpressionFunctionCall, GlobalContext, Literal, - OrderBy, PartitionBy, PyDoughCollectionQDAG, PyDoughExpressionQDAG, @@ -35,8 +33,6 @@ Reference, SidedReference, SubCollection, - TopK, - Where, WindowCall, ) from pydough.types import PyDoughType @@ -673,8 +669,7 @@ def qualify_calculate( qualified_term = self.qualify_expression(term, qualified_parent, children) qualified_terms.append((name, qualified_term)) # Use the qualified children & terms to create a new CALCULATE node. - calculate: Calculate = self.builder.build_calculate(qualified_parent, children) - return calculate.with_terms(qualified_terms) + return self.builder.build_calculate(qualified_parent, children, qualified_terms) def qualify_where( self, @@ -715,8 +710,7 @@ def qualify_where( unqualified_cond, qualified_parent, children ) # Use the qualified children & condition to create a new WHERE node. - where: Where = self.builder.build_where(qualified_parent, children) - return where.with_condition(qualified_cond) + return self.builder.build_where(qualified_parent, children, qualified_cond) def _expressions_to_collations( self, terms: Iterable[UnqualifiedNode] | list[UnqualifiedNode] @@ -801,8 +795,9 @@ def qualify_order_by( raise PyDoughUnqualifiedException( "ORDER BY requires a 'by' clause to be specified." ) - orderby: OrderBy = self.builder.build_order(qualified_parent, children) - return orderby.with_collation(qualified_collations) + return self.builder.build_order( + qualified_parent, children, qualified_collations + ) def qualify_top_k( self, @@ -858,10 +853,9 @@ def qualify_top_k( "TopK requires a 'by' clause to be specified." ) # Use the qualified children & collation to create a new TOP K node. - topk: TopK = self.builder.build_top_k( - qualified_parent, children, records_to_keep + return self.builder.build_top_k( + qualified_parent, children, records_to_keep, qualified_collations ) - return topk.with_collation(qualified_collations) def split_partition_ancestry( self, node: UnqualifiedNode, partition_ancestor: str | None = None @@ -1036,9 +1030,8 @@ def qualify_partition( child_references.append(child_ref) # Use the qualified child & keys to create a new PARTITION node. partition: PartitionBy = self.builder.build_partition( - qualified_parent, qualified_child, child_name + qualified_parent, qualified_child, child_name, child_references ) - partition = partition.with_keys(child_references) # Special case: if accessing as a child, wrap in a # ChildOperatorChildAccess term. if isinstance(unqualified_parent, UnqualifiedRoot) and is_child: @@ -1076,9 +1069,7 @@ def qualify_collection( unqualified, context, [], is_child, is_cross ) if not isinstance(answer, PyDoughCollectionQDAG): - raise PyDoughUnqualifiedException( - f"Expected a collection, but received an expression: {answer}" - ) + raise pydough.active_session.error_builder.expected_collection(answer) return answer def qualify_expression( @@ -1109,9 +1100,7 @@ def qualify_expression( unqualified, context, children, True, False ) if not isinstance(answer, PyDoughExpressionQDAG): - raise PyDoughUnqualifiedException( - f"Expected an expression, but received a collection: {answer}" - ) + raise pydough.active_session.error_builder.expected_expression(answer) return answer def qualify_singular( @@ -1193,8 +1182,8 @@ def qualify_best( # Build the final expanded window-based filter qualified_child: PyDoughCollectionQDAG = self.builder.build_where( - qualified_parent, children - ).with_condition(qualified_cond) + qualified_parent, children, qualified_cond + ) # Extract the `levels` argument from the condition assert isinstance(qualified_cond, ExpressionFunctionCall) diff --git a/pydough/unqualified/unqualified_transform.py b/pydough/unqualified/unqualified_transform.py index 3cea3782e..d5a2d9150 100644 --- a/pydough/unqualified/unqualified_transform.py +++ b/pydough/unqualified/unqualified_transform.py @@ -10,9 +10,9 @@ import types from typing import Any +from pydough.errors import PyDoughUnqualifiedException from pydough.metadata import GraphMetadata -from .errors import PyDoughUnqualifiedException from .unqualified_node import UnqualifiedNode diff --git a/tests/test_exploration.py b/tests/test_exploration.py index e9adc64a8..e74e11ef1 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -1161,7 +1161,7 @@ def test_graph_structure( ├─┬─ Partition[name='part_types', by=part_type] │ └─┬─ AccessChild │ └─── TableCollection[parts] - ├─┬─ Calculate[part_type=part_type, avg_price=AVG($1.retail_price)] + ├─┬─ Calculate[avg_price=AVG($1.retail_price)] │ └─┬─ AccessChild │ └─── PartitionChild[parts] └─┬─ Where[avg_price >= 27.5] diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 56a44feb8..539224e92 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -143,6 +143,7 @@ simple_cross_10, simple_cross_11, simple_cross_12, + simple_cross_13, simple_filter_top_five, simple_int_float_string_cast, simple_scan, @@ -2234,6 +2235,15 @@ ), id="simple_cross_12", ), + pytest.param( + PyDoughPandasTest( + simple_cross_13, + "TPCH", + lambda: pd.DataFrame({"n1": [142], "n2": [8]}), + "simple_cross_13", + ), + id="simple_cross_13", + ), pytest.param( PyDoughPandasTest( simple_var_std, diff --git a/tests/test_plan_refsols/simple_cross_13.txt b/tests/test_plan_refsols/simple_cross_13.txt new file mode 100644 index 000000000..de0ad16f7 --- /dev/null +++ b/tests/test_plan_refsols/simple_cross_13.txt @@ -0,0 +1,14 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1)], orderings=[]) + JOIN(condition=True:bool, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_acctbal <= min_balance + 10.0:numeric, columns={}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'min_balance': t1.min_balance}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'min_balance': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=s_acctbal >= max_balance - 10.0:numeric, columns={}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'max_balance': t1.max_balance, 's_acctbal': t0.s_acctbal}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal}) + AGGREGATE(keys={}, aggregations={'max_balance': MAX(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal}) diff --git a/tests/test_pydough_functions/exploration_examples.py b/tests/test_pydough_functions/exploration_examples.py index 39d3abd12..4eb978303 100644 --- a/tests/test_pydough_functions/exploration_examples.py +++ b/tests/test_pydough_functions/exploration_examples.py @@ -106,10 +106,7 @@ def partition_impl() -> UnqualifiedNode: def partition_child_impl() -> UnqualifiedNode: return ( parts.PARTITION(name="part_types", by=part_type) - .CALCULATE( - part_type, - avg_price=AVG(parts.retail_price), - ) + .CALCULATE(avg_price=AVG(parts.retail_price)) .WHERE(avg_price >= 27.5) .parts ) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 6fc5248d8..4a8aeef6e 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -2970,6 +2970,17 @@ def simple_cross_12(): ) +def simple_cross_13(): + # Strange way to count how many customers have the an account balance + # within 10 of the global minimum, and how many suppliers have an account + # balance within 10 of the global maximum. + glob1 = TPCH.CALCULATE(min_balance=MIN(customers.account_balance)) + cust = customers.WHERE(account_balance <= (CROSS(glob1).min_balance + 10.0)) + glob2 = TPCH.CALCULATE(max_balance=MAX(suppliers.account_balance)) + supp = suppliers.WHERE(account_balance >= (CROSS(glob2).max_balance - 10.0)) + return TPCH.CALCULATE(n1=COUNT(cust), n2=COUNT(supp)) + + def quantile_function_test_1(): selected_orders = customers.orders.WHERE(YEAR(order_date) == 1998) return TPCH.CALCULATE( diff --git a/tests/test_qdag_collection.py b/tests/test_qdag_collection.py index 600102ae7..8e38f601b 100644 --- a/tests/test_qdag_collection.py +++ b/tests/test_qdag_collection.py @@ -548,7 +548,6 @@ def region_intra_pct() -> tuple[CollectionTestInfo, str, str]: ) ** CalculateInfo( [SubCollectionInfo("parts")], - container=ReferenceInfo("container"), total_price=FunctionInfo( "SUM", [ChildReferenceExpressionInfo("retail_price", 0)] ), @@ -1241,7 +1240,6 @@ def test_collections_calc_terms( ) ** CalculateInfo( [SubCollectionInfo("parts")], - container=ReferenceInfo("container"), total_price=FunctionInfo( "SUM", [ChildReferenceExpressionInfo("retail_price", 0)] ), @@ -1259,14 +1257,14 @@ def test_collections_calc_terms( ], ), ), - "TPCH.Partition(parts.ORDER_BY(retail_price.DESC(na_pos='last')), name='containers', by=container).CALCULATE(container=container, total_price=SUM(parts.retail_price)).parts.CALCULATE(part_name=name, container=container, ratio=retail_price / total_price)", + "TPCH.Partition(parts.ORDER_BY(retail_price.DESC(na_pos='last')), name='containers', by=container).CALCULATE(total_price=SUM(parts.retail_price)).parts.CALCULATE(part_name=name, container=container, ratio=retail_price / total_price)", """ ──┬─ TPCH ├─┬─ Partition[name='containers', by=container] │ └─┬─ AccessChild │ ├─── TableCollection[parts] │ └─── OrderBy[retail_price.DESC(na_pos='last')] - └─┬─ Calculate[container=container, total_price=SUM($1.retail_price)] + └─┬─ Calculate[total_price=SUM($1.retail_price)] ├─┬─ AccessChild │ └─── PartitionChild[parts] ├─── PartitionChild[parts] @@ -2188,7 +2186,6 @@ def test_collections_to_string( ) ** CalculateInfo( [SubCollectionInfo("parts")], - container=ReferenceInfo("container"), total_price=FunctionInfo( "SUM", [ChildReferenceExpressionInfo("retail_price", 0)] ), diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index d5487aaeb..55b4b592c 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -269,6 +269,30 @@ def bad_pydough_impl_23(root: UnqualifiedNode) -> UnqualifiedNode: ) +def bad_pydough_impl_24(root: UnqualifiedNode) -> UnqualifiedNode: + # Conflict between `name` of nations vs customers + return root.nations.CALCULATE(root.name).customers.CALCULATE(root.name) + + +def bad_pydough_impl_25(root: UnqualifiedNode) -> UnqualifiedNode: + # Conflict between `name` of regions vs customers + return root.regions.CALCULATE(root.name).nations.customers.CALCULATE(root.name) + + +def bad_pydough_impl_26(root: UnqualifiedNode) -> UnqualifiedNode: + # Conflict between `n` of partition vs orders + return ( + root.orders.PARTITION("priorities", by=root.order_priority) + .CALCULATE(key=root.COUNT(root.orders)) + .orders.CALCULATE(root.key) + ) + + +def bad_pydough_impl_27(root: UnqualifiedNode) -> UnqualifiedNode: + # Treating CROSS as singular + return root.regions.CALCULATE(n1=root.name, n2=root.CROSS(root.regions).name) + + def bad_replace_too_many_args(root: UnqualifiedNode) -> UnqualifiedNode: # Too many arguments to replace return root.nations.CALCULATE( @@ -411,6 +435,26 @@ def bad_str_count_few_args(root: UnqualifiedNode) -> UnqualifiedNode: "PyDough nodes POPULATION_STD is not callable. Did you mean to use a function?", id="23", ), + pytest.param( + bad_pydough_impl_24, + "Unclear whether 'name' refers to a term of the current context or ancestor of collection TPCH.nations.CALCULATE(name=name).customers", + id="24", + ), + pytest.param( + bad_pydough_impl_25, + "Unclear whether 'name' refers to a term of the current context or ancestor of collection TPCH.regions.CALCULATE(name=name).nations.customers", + id="25", + ), + pytest.param( + bad_pydough_impl_26, + "Unclear whether 'key' refers to a term of the current context or ancestor of collection TPCH.Partition(orders, name='priorities', by=order_priority).CALCULATE(key=COUNT(orders)).orders", + id="26", + ), + pytest.param( + bad_pydough_impl_27, + "Expected all terms in CALCULATE(n1=name, n2=TPCH.regions.name) to be singular, but encountered a plural expression: TPCH.regions.name", + id="27", + ), pytest.param( bad_replace_too_many_args, "Expected between 2 and 3 arguments inclusive, received 4", diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index d3c2aef74..ac2274bcd 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -43,18 +43,13 @@ from pydough.pydough_operators import get_operator_by_name from pydough.qdag import ( AstNodeBuilder, - Calculate, ChildOperatorChildAccess, ChildReferenceExpression, CollationExpression, - OrderBy, - PartitionBy, PyDoughCollectionQDAG, PyDoughExpressionQDAG, PyDoughQDAG, Singular, - TopK, - Where, ) from pydough.relational import ( ColumnReference, @@ -630,13 +625,12 @@ def local_build( builder, context, ) - raw_calc: Calculate = builder.build_calculate(context, children) args: list[tuple[str, PyDoughExpressionQDAG]] = [] for name, info in self.args: expr = info.build(builder, context, children) assert isinstance(expr, PyDoughExpressionQDAG) args.append((name, expr)) - return raw_calc.with_terms(args) + return builder.build_calculate(context, children, args) class WhereInfo(ChildOperatorInfo): @@ -666,10 +660,9 @@ def local_build( "Must provide a context when building a WHERE clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) - raw_where: Where = builder.build_where(context, children) cond = self.condition.build(builder, context, children) assert isinstance(cond, PyDoughExpressionQDAG) - return raw_where.with_condition(cond) + return builder.build_where(context, children, cond) class SingularInfo(ChildOperatorInfo): @@ -740,13 +733,12 @@ def local_build( "Must provide context and children_contexts when building an ORDER BY clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) - raw_order: OrderBy = builder.build_order(context, children) collation: list[CollationExpression] = [] for info, asc, na_last in self.collation: expr = info.build(builder, context, children) assert isinstance(expr, PyDoughExpressionQDAG) collation.append(CollationExpression(expr, asc, na_last)) - return raw_order.with_collation(collation) + return builder.build_order(context, children, collation) class TopKInfo(ChildOperatorInfo): @@ -789,13 +781,12 @@ def local_build( "Must provide context and children_contexts when building a TOPK clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) - raw_top_k: TopK = builder.build_top_k(context, children, self.records_to_keep) collation: list[CollationExpression] = [] for info, asc, na_last in self.collation: expr = info.build(builder, context, children) assert isinstance(expr, PyDoughExpressionQDAG) collation.append(CollationExpression(expr, asc, na_last)) - return raw_top_k.with_collation(collation) + return builder.build_top_k(context, children, self.records_to_keep, collation) class PartitionInfo(ChildOperatorInfo): @@ -830,15 +821,12 @@ def local_build( context = builder.build_global_context() children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) assert len(children) == 1 - raw_partition: PartitionBy = builder.build_partition( - context, children[0], self.name - ) keys: list[ChildReferenceExpression] = [] for info in self.keys: expr = info.build(builder, context, children) assert isinstance(expr, ChildReferenceExpression) keys.append(expr) - return raw_partition.with_keys(keys) + return builder.build_partition(context, children[0], self.name, keys) def make_relational_column_reference( From e812143cd0fb13897669fe57999b1ce6c88b9831 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 22:33:14 -0400 Subject: [PATCH 039/143] Overhaul test_qualify_error --- tests/test_qualification_errors.py | 439 +++++------------------------ 1 file changed, 73 insertions(+), 366 deletions(-) diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index 55b4b592c..8446904a0 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -4,7 +4,6 @@ """ import re -from collections.abc import Callable import pytest @@ -13,7 +12,6 @@ from pydough.metadata import GraphMetadata from pydough.unqualified import ( UnqualifiedNode, - UnqualifiedRoot, qualify_node, ) from tests.testing_utilities import ( @@ -21,477 +19,186 @@ ) -def bad_pydough_impl_01(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.nations.CALCULATE(nation_name=name, total_balance=SUM(account_balance)) - ``` - The problem: there is no property `account_balance` to be accessed from nations. - """ - return root.nations.CALCULATE( - nation_name=root.name, total_balance=root.SUM(root.account_balance) - ) - - -def bad_pydough_impl_02(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.nations.CALCULATE(nation_name=FIZZBUZZ(name)) - ``` - The problem: there is no function named FIZZBUZZ, so this looks like a - CALCULATE being done onto a subcollection, which cannot be used as an - expression inside a CALCULATE. - """ - return root.nations.CALCULATE(nation_name=root.FIZZBUZZ(root.name)) - - -def bad_pydough_impl_03(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.nations.CALCULATE(y=suppliers.CALCULATE(x=COUNT(supply_records)).x) - ``` - The problem: `suppliers.CALCULATE(x=COUNT(supply_records))` is plural with regards - to nations, so accessing its `x` property is still plural, therefore it - cannot be used as a term inside a CALCULATE from the context of nations. - """ - return root.nations.CALCULATE( - y=root.suppliers.CALCULATE(x=root.COUNT(root.supply_records)).x - ) - - -def bad_pydough_impl_04(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.nations.name.hello - ``` - The problem: nations.name is an expression, so invoking `.hello` on it is - not valid. - """ - return root.nations.name.hello - - -def bad_pydough_impl_05(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.Customer(r=nation.region) - ``` - The problem: nation.region is a collection, therefore cannot be used as - an expression in a CALCULATE. - """ - return root.customers.CALCULATE(r=root.nation.region) - - -def bad_pydough_impl_06(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.suppliers.supply_records.CALCULATE(o=lines.order.order_date) - ``` - The problem: lines is plural with regards to supply_records, therefore - lines.order.order_date is also plural and it cannot be used in a CALCULATE - in the context of supply_records. - """ - return root.suppliers.supply_records.CALCULATE(o=root.lines.order.order_date) - - -def bad_pydough_impl_07(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.lines.CALCULATE(v=MUL(extended_price, SUB(1, discount))) - ``` - The problem: there is no function named MUL or SUB, so this looks like a - CALCULATE operation on a subcollection, which cannot be used as an - expression inside of a CALCULATE. - """ - return root.lines.CALCULATE( - v=root.MUL(root.extended_price, root.SUB(1, root.discount)) - ) - - -def bad_pydough_impl_08(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.lines.tax = 0 - TPCH.lines.CALCULATE(value=extended_price * tax) - ``` - The problem: writing to an unqualified node is not yet supported. - """ - root.lines.tax = 0 - return root.lines.CALCULATE(value=root.extended_price * root.tax) - - -def bad_pydough_impl_09(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - best_customer = nations.customers.BEST(per='nations', by=account_balance.DESC()) - regions.CALCULATE(n=best_customer.name) - ``` - The problem: The cardinality is off since even though the `BEST` ensures - the customers are singular with regards to the nation, the nations are - still plural with regards to the region. - """ - best_customer = root.nations.customers.BEST( - per="nations", by=root.account_balance.DESC() - ) - return root.regions.CALCULATE(n=best_customer.name) - - -def bad_pydough_impl_10(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - best_customer = nations.customers.BEST(per='regions', by=account_balance.DESC(), allow_ties=True) - regions.CALCULATE(n=best_customer.name) - ``` - The problem: the presence of `allow_ties=True` means that the `BEST` - operator does not guarantee `nations.customers` is plural with regards to - `regions`. - """ - best_customer = root.nations.customers.BEST( - per="regions", by=root.account_balance.DESC(), allow_ties=True - ) - return root.regions.CALCULATE(n=best_customer.name) - - -def bad_pydough_impl_11(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - best_customer = nations.customers.BEST(per='regions', by=account_balance.DESC(), n_best=3) - regions.CALCULATE(n=best_customer.name) - ``` - The problem: the presence of `n_best=3` means that the `BEST` operator - does not guarantee `nations.customers` is plural with regards to `regions`. - """ - best_customer = root.nations.customers.BEST( - per="regions", by=root.account_balance.DESC(), allow_ties=True - ) - return root.regions.CALCULATE(n=best_customer.name) - - -def bad_pydough_impl_12(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - regions.nations.customers.BEST(per='regions', by=account_balance.DESC(), n_best=3, allow_ties=True) - ``` - The problem: cannot simultaneously use `n_best=3` and `allow_ties=True`. - """ - return root.regions.nations.customers.BEST( - per="regions", by=root.account_balance.DESC(), n_best=3, allow_ties=True - ) - - -def bad_pydough_impl_13(root: UnqualifiedNode) -> UnqualifiedNode: - # Non-existent per name - return root.customers.orders.CALCULATE(root.RANKING(by=root.key.ASC(), per="custs")) - - -def bad_pydough_impl_14(root: UnqualifiedNode) -> UnqualifiedNode: - # Bad index of valid per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:2") - ) - - -def bad_pydough_impl_15(root: UnqualifiedNode) -> UnqualifiedNode: - # Ambiguous per name - return root.customers.orders.customer.orders.lines.CALCULATE( - root.RANKING(by=root.extended_price.DESC(), per="orders") - ) - - -def bad_pydough_impl_16(root: UnqualifiedNode) -> UnqualifiedNode: - # Malformed per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:k") - ) - - -def bad_pydough_impl_17(root: UnqualifiedNode) -> UnqualifiedNode: - # Malformed per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:1:2") - ) - - -def bad_pydough_impl_18(root: UnqualifiedNode) -> UnqualifiedNode: - # Malformed per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:") - ) - - -def bad_pydough_impl_19(root: UnqualifiedNode) -> UnqualifiedNode: - # Malformed per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:0") - ) - - -def bad_pydough_impl_20(root: UnqualifiedNode) -> UnqualifiedNode: - # Internal function name - return root.Nations.CALCULATE( - name=root.name, - var=root.SAMPLE_VAR(root.suppliers.account_balance), - ) - - -def bad_pydough_impl_21(root: UnqualifiedNode) -> UnqualifiedNode: - # Internal function name - return root.Nations.CALCULATE( - name=root.name, - var=root.SAMPLE_VARIANCE(root.suppliers.account_balance), - ) - - -def bad_pydough_impl_22(root: UnqualifiedNode) -> UnqualifiedNode: - # Internal function name - return root.Nations.CALCULATE( - name=root.name, - var=root.SAMPLE_STD(root.suppliers.account_balance), - ) - - -def bad_pydough_impl_23(root: UnqualifiedNode) -> UnqualifiedNode: - # Internal function name - return root.Nations.CALCULATE( - name=root.name, - std=root.POPULATION_STD(root.suppliers.account_balance), - ) - - -def bad_pydough_impl_24(root: UnqualifiedNode) -> UnqualifiedNode: - # Conflict between `name` of nations vs customers - return root.nations.CALCULATE(root.name).customers.CALCULATE(root.name) - - -def bad_pydough_impl_25(root: UnqualifiedNode) -> UnqualifiedNode: - # Conflict between `name` of regions vs customers - return root.regions.CALCULATE(root.name).nations.customers.CALCULATE(root.name) - - -def bad_pydough_impl_26(root: UnqualifiedNode) -> UnqualifiedNode: - # Conflict between `n` of partition vs orders - return ( - root.orders.PARTITION("priorities", by=root.order_priority) - .CALCULATE(key=root.COUNT(root.orders)) - .orders.CALCULATE(root.key) - ) - - -def bad_pydough_impl_27(root: UnqualifiedNode) -> UnqualifiedNode: - # Treating CROSS as singular - return root.regions.CALCULATE(n1=root.name, n2=root.CROSS(root.regions).name) - - -def bad_replace_too_many_args(root: UnqualifiedNode) -> UnqualifiedNode: - # Too many arguments to replace - return root.nations.CALCULATE( - replace_name1=root.REPLACE(root.name, "a", "b", "c"), - ) - - -def bad_replace_few_args(root: UnqualifiedNode) -> UnqualifiedNode: - # Not enough arguments to replace - return root.nations.CALCULATE(replace_name2=root.REPLACE("a")) - - -def bad_str_count_too_many_args(root: UnqualifiedNode) -> UnqualifiedNode: - # Too many arguments to str_count - return root.nations.CALCULATE( - str_count1=root.STRCOUNT(root.name, "a", "b"), - ) - - -def bad_str_count_few_args(root: UnqualifiedNode) -> UnqualifiedNode: - # Not enough arguments to str_count - return root.nations.CALCULATE(str_count2=root.STRCOUNT(root.name)) - - @pytest.mark.parametrize( - "impl, error_msg", + "pydough_text, error_msg", [ pytest.param( - bad_pydough_impl_01, + "result = nations.CALCULATE(nation_name=name, total_balance=SUM(account_balance))", "Unrecognized term of TPCH.nations: 'account_balance'. Did you mean: comment, customers, name, region_key, suppliers, region?", - id="01", + id="bad_name", ), pytest.param( - bad_pydough_impl_02, + "result = nations.CALCULATE(nation_name=FIZZBUZZ(name))", "PyDough nodes FIZZBUZZ is not callable. Did you mean to use a function?", - id="02", + id="non_function", ), pytest.param( - bad_pydough_impl_03, + "result = nations.CALCULATE(y=suppliers.CALCULATE(x=COUNT(supply_records)).x)", "Expected all terms in CALCULATE(y=suppliers.CALCULATE(x=COUNT(supply_records)).x) to be singular, but encountered a plural expression: suppliers.CALCULATE(x=COUNT(supply_records)).x", - id="03", + id="bad_plural_1", ), pytest.param( - bad_pydough_impl_04, + "result = TPCH.nations.name.hello", "Expected a collection, but received an expression: TPCH.nations.name", - id="04", + id="expression_instead_of_collection", ), pytest.param( - bad_pydough_impl_05, + "result = customers.CALCULATE(r=nation.region)", "Expected an expression, but received a collection: nation.region", - id="05", + id="collection_instead_of_expression", ), pytest.param( - bad_pydough_impl_06, + "result = suppliers.supply_records.CALCULATE(o=lines.order.order_date)", "Expected all terms in CALCULATE(o=lines.order.order_date) to be singular, but encountered a plural expression: lines.order.order_date", - id="06", + id="bad_plural_2", ), pytest.param( - bad_pydough_impl_07, + "lines.CALCULATE(v=MUL(extended_price, SUB(1, discount)))", "PyDough nodes SUB is not callable. Did you mean to use a function?", - id="07", + id="binop_function_call", ), pytest.param( - bad_pydough_impl_08, + "TPCH.lines.tax = 0", "PyDough objects do not yet support writing properties to them.", - id="08", + id="setattr", ), pytest.param( - bad_pydough_impl_09, + "best_customer = nations.customers.BEST(per='nations', by=account_balance.DESC())\n" + "result = regions.CALCULATE(n=best_customer.name)", "Expected all terms in CALCULATE(n=nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=1, allow_ties=False) == 1).name) to be singular, but encountered a plural expression: nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=1, allow_ties=False) == 1).name", - id="09", + id="bad_best_1", ), pytest.param( - bad_pydough_impl_10, + "best_customer = nations.customers.BEST(per='regions', by=account_balance.DESC(), allow_ties=True)\n" + "result = regions.CALCULATE(n=best_customer.name)", "Expected all terms in CALCULATE(n=nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=True) == 1).name) to be singular, but encountered a plural expression: nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=True) == 1).name", - id="10", + id="bad_best_2", ), pytest.param( - bad_pydough_impl_11, - "Expected all terms in CALCULATE(n=nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=True) == 1).name) to be singular, but encountered a plural expression: nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=True) == 1).name", - id="11", + "best_customer = nations.customers.BEST(per='regions', by=account_balance.DESC(), n_best=3)\n" + "result = regions.CALCULATE(n=best_customer.name)", + "Expected all terms in CALCULATE(n=nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=False) <= 3).name) to be singular, but encountered a plural expression: nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=False) <= 3).name", + id="bad_best_3", ), pytest.param( - bad_pydough_impl_12, + "result = regions.nations.customers.BEST(per='regions', by=account_balance.DESC(), n_best=3, allow_ties=True)", "Cannot allow ties when multiple best values are requested", - id="12", + id="bad_best_4", ), pytest.param( - bad_pydough_impl_13, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='custs'))", "Per string refers to unrecognized ancestor 'custs' of TPCH.customers.orders", - id="13", + id="bad_per_1", ), pytest.param( - bad_pydough_impl_14, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:2'))", "Per string 'customers:2' invalid as there are not 2 ancestors of the current context with name 'customers'.", - id="14", + id="bad_per_2", ), pytest.param( - bad_pydough_impl_15, + "result = customers.orders.customer.orders.lines.CALCULATE(RANKING(by=extended_price.DESC(), per='orders'))", "Per string 'orders' is ambiguous for TPCH.customers.orders.customer.orders.lines. Use the form 'orders:index' to disambiguate, where 'orders:1' refers to the most recent ancestor.", - id="15", + id="bad_per_3", ), pytest.param( - bad_pydough_impl_16, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:k'))", "Malformed per string: 'customers:k' (expected the index after ':' to be a positive integer)", - id="16", + id="bad_per_4", ), pytest.param( - bad_pydough_impl_17, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:1:2'))", "Malformed per string: 'customers:1:2' (expected 0 or 1 ':', found 2)", - id="17", + id="bad_per_5", ), pytest.param( - bad_pydough_impl_18, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:'))", "Malformed per string: 'customers:' (expected the index after ':' to be a positive integer)", - id="18", + id="bad_per_6", ), pytest.param( - bad_pydough_impl_19, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:0'))", "Malformed per string: 'customers:0' (expected the index after ':' to be a positive integer)", - id="19", + id="bad_per_7", ), pytest.param( - bad_pydough_impl_20, + "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", "PyDough nodes SAMPLE_VAR is not callable. Did you mean to use a function?", - id="20", + id="kwargfunc_1", ), pytest.param( - bad_pydough_impl_21, + "result = nations.CALCULATE(name=name, var=SAMPLE_VARIANCE(suppliers.account_balance))", "PyDough nodes SAMPLE_VARIANCE is not callable. Did you mean to use a function?", - id="21", + id="kwargfunc_2", ), pytest.param( - bad_pydough_impl_22, + "result = nations.CALCULATE(name=name, var=SAMPLE_STD(suppliers.account_balance))", "PyDough nodes SAMPLE_STD is not callable. Did you mean to use a function?", - id="22", + id="kwargfunc_3", ), pytest.param( - bad_pydough_impl_23, + "result = nations.CALCULATE(name=name, std=POPULATION_STD(suppliers.account_balance))", "PyDough nodes POPULATION_STD is not callable. Did you mean to use a function?", - id="23", + id="kwargfunc_4", ), pytest.param( - bad_pydough_impl_24, + "result = nations.CALCULATE(name).customers.CALCULATE(name)", "Unclear whether 'name' refers to a term of the current context or ancestor of collection TPCH.nations.CALCULATE(name=name).customers", - id="24", + id="downstream_1", ), pytest.param( - bad_pydough_impl_25, + "result = regions.CALCULATE(name).nations.customers.CALCULATE(name)", "Unclear whether 'name' refers to a term of the current context or ancestor of collection TPCH.regions.CALCULATE(name=name).nations.customers", - id="25", + id="downstream_2", ), pytest.param( - bad_pydough_impl_26, + "result = orders.PARTITION(name='priorities', by=order_priority).CALCULATE(key=COUNT(orders)).orders.CALCULATE(key)", "Unclear whether 'key' refers to a term of the current context or ancestor of collection TPCH.Partition(orders, name='priorities', by=order_priority).CALCULATE(key=COUNT(orders)).orders", - id="26", + id="downstream_3", ), pytest.param( - bad_pydough_impl_27, + "result = regions.CALCULATE(n1=name, n2=CROSS(regions).name)", "Expected all terms in CALCULATE(n1=name, n2=TPCH.regions.name) to be singular, but encountered a plural expression: TPCH.regions.name", - id="27", + id="plural_cross", ), pytest.param( - bad_replace_too_many_args, + "result = nations.CALCULATE(replace_name1=REPLACE(name, 'a', 'b', 'c'))", "Expected between 2 and 3 arguments inclusive, received 4", id="bad_replace_too_many_args", ), pytest.param( - bad_replace_few_args, + "result = nations.CALCULATE(replace_name2=REPLACE('a'))", "Expected between 2 and 3 arguments inclusive, received 1", id="bad_replace_few_args", ), pytest.param( - bad_str_count_too_many_args, + "result = nations.CALCULATE(str_count1=STRCOUNT(name, 'a', 'b'))", "Expected 2 arguments, received 3", id="bad_str_count_too_many_args", ), pytest.param( - bad_str_count_few_args, + "result = nations.CALCULATE(str_count2=STRCOUNT(name))", "Expected 2 arguments, received 1", id="bad_str_count_few_args", ), ], ) def test_qualify_error( - impl: Callable[[UnqualifiedNode], UnqualifiedNode], + pydough_text: str, error_msg: str, get_sample_graph: graph_fetcher, ) -> None: """ - Tests that strings representing the setup of PyDough unqualified objects - (with unknown variables already pre-pended with `_ROOT.`) are correctly - transformed into UnqualifiedNode objects with an expected string - representation. Each `pydough_str` should be called with `exec` to define - a variable `answer` that is an `UnqualifiedNode` instance. + Tests that the qualification process correctly raises the expected error + messages when the PyDough text is invalid. Takes in the PyDough text and + converts it to unqualified nodes with `from_string`, then qualifies it to + ensure that the error is raised as expected. The PyDough text can be 1 or + multiple lines, but must end with storing the answers in a variable + called `result`. """ graph: GraphMetadata = get_sample_graph("TPCH") - root: UnqualifiedNode = UnqualifiedRoot(graph) default_config: PyDoughConfigs = pydough.active_session.config with pytest.raises(Exception, match=re.escape(error_msg)): - unqualified: UnqualifiedNode = impl(root) + unqualified: UnqualifiedNode = pydough.from_string( + pydough_text, answer_variable="result", metadata=graph + ) qualify_node(unqualified, graph, default_config) From 993553f480c9bb571214a49e4dc9090582fe5b47 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 14:35:40 -0400 Subject: [PATCH 040/143] Moving more errors --- .../database_connectors/database_connector.py | 7 ++- pydough/errors/pydough_error_builder.py | 50 ++++++++++++++++++- pydough/sqlglot/execute_relational.py | 6 ++- .../sqlglot_relational_expression_visitor.py | 12 +++-- 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index c9eb8bd7c..e16426b1e 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -11,7 +11,8 @@ import pandas as pd -from pydough.errors import PyDoughSessionException, PyDoughSQLException +import pydough +from pydough.errors import PyDoughSessionException from .db_types import DBConnection, DBCursor @@ -49,7 +50,9 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: cursor.execute(sql) except Exception as e: print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") - raise PyDoughSQLException(*e.args) from e + raise pydough.active_session.error_builder.sql_runtime_failure( + sql, e, True + ) from e column_names: list[str] = [description[0] for description in cursor.description] # No need to close the cursor, as its closed by del. # TODO: (gh #174) Cache the cursor? diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index 33c093c4b..0a560b520 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -4,11 +4,12 @@ from typing import TYPE_CHECKING -from pydough.errors import PyDoughException, PyDoughQDAGException +from pydough.errors import PyDoughException, PyDoughQDAGException, PyDoughSQLException if TYPE_CHECKING: from pydough.pydough_operators import PyDoughOperator from pydough.qdag import PyDoughCollectionQDAG, PyDoughExpressionQDAG + from pydough.relational import CallExpression class PyDoughErrorBuilder: @@ -182,3 +183,50 @@ def bad_columns(self, columns: object) -> PyDoughException: return PyDoughQDAGException( f"Expected `columns` argument to be a list or dictionary, found {columns.__class__.__name__}" ) + + def sql_runtime_failure( + self, sql: str, error: Exception, execute: bool + ) -> PyDoughException: + """ + Creates an exception for when a SQL query fails to execute at runtime + or optimization. + + Args: + `sql`: The SQL query that failed. + `error`: The exception raised during the SQL execution or + optimization. + `execute`: Whether the failure occurred during execution (True) or + optimization (False). + + Returns: + An exception indicating the SQL runtime/optimization failure. + """ + if execute: + return PyDoughSQLException( + "SQL query execution failed. Please check the query syntax and database connection:\n" + f"{sql}\nError: {error}" + ) + else: + return PyDoughSQLException( + "SQL query optimization failed. Please check the query syntax:\n" + f"{sql}\nError: {error}" + ) + + def sql_call_conversion_error( + self, call: "CallExpression", error: Exception + ) -> PyDoughException: + """ + Creates an exception for when the conversion of a call expression from + Relational to SQL fails. + + Args: + `call`: The relational function call expression that + failed to convert. + `error`: The exception raised during the conversion. + + Returns: + An exception indicating the SQL call conversion failure. + """ + return PyDoughQDAGException( + f"Failed to convert expression {call.to_string(True)} to SQL: {error}" + ) diff --git a/pydough/sqlglot/execute_relational.py b/pydough/sqlglot/execute_relational.py index a4d5801ac..281e5859a 100644 --- a/pydough/sqlglot/execute_relational.py +++ b/pydough/sqlglot/execute_relational.py @@ -23,12 +23,12 @@ from sqlglot.optimizer.simplify import simplify from sqlglot.optimizer.unnest_subqueries import unnest_subqueries +import pydough from pydough.configs import PyDoughConfigs from pydough.database_connectors import ( DatabaseContext, DatabaseDialect, ) -from pydough.errors import PyDoughSQLException from pydough.logger import get_logger from pydough.relational import RelationalRoot from pydough.relational.relational_expressions import ( @@ -69,7 +69,9 @@ def convert_relation_to_sql( except SqlglotError as e: sql_text: str = glot_expr.sql(sqlglot_dialect, pretty=True) print(f"ERROR WHILE OPTIMIZING QUERY:\n{sql_text}") - raise PyDoughSQLException(*e.args) + raise pydough.active_session.error_builder.sql_runtime_failure( + sql_text, e, False + ) from e # Convert the optimized AST back to a SQL string. return glot_expr.sql(sqlglot_dialect, pretty=True) diff --git a/pydough/sqlglot/sqlglot_relational_expression_visitor.py b/pydough/sqlglot/sqlglot_relational_expression_visitor.py index da8c8b09d..343ed6ab7 100644 --- a/pydough/sqlglot/sqlglot_relational_expression_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_expression_visitor.py @@ -14,6 +14,7 @@ from sqlglot.expressions import Identifier from sqlglot.expressions import Star as SQLGlotStar +import pydough import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs from pydough.database_connectors import DatabaseDialect @@ -76,9 +77,14 @@ def visit_call_expression(self, call_expression: CallExpression) -> None: input_types: list[PyDoughType] = [ arg.data_type for arg in call_expression.inputs ] - output_expr: SQLGlotExpression = self._bindings.convert_call_to_sqlglot( - call_expression.op, input_exprs, input_types - ) + try: + output_expr: SQLGlotExpression = self._bindings.convert_call_to_sqlglot( + call_expression.op, input_exprs, input_types + ) + except Exception as e: + raise pydough.active_session.error_builder.sql_call_conversion_error( + call_expression, e + ) self._stack.append(output_expr) @staticmethod From 85e7c8fd57ec106f4f4ee0686eb753da4c73bc82 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 18:25:55 -0400 Subject: [PATCH 041/143] Minor adjustment to how pagerank was written --- tests/test_plan_refsols/pagerank_a0.txt | 10 +-- tests/test_plan_refsols/pagerank_a1.txt | 16 ++--- tests/test_plan_refsols/pagerank_a2.txt | 24 +++---- tests/test_plan_refsols/pagerank_a6.txt | 56 +++++++-------- tests/test_plan_refsols/pagerank_b3.txt | 32 ++++----- tests/test_plan_refsols/pagerank_c4.txt | 40 +++++------ tests/test_plan_refsols/pagerank_d5.txt | 48 ++++++------- tests/test_plan_refsols/pagerank_h8.txt | 72 +++++++++---------- .../simple_pydough_functions.py | 9 ++- tests/test_sql_refsols/pagerank_a0_sqlite.sql | 16 ++--- tests/test_sql_refsols/pagerank_a1_sqlite.sql | 11 ++- tests/test_sql_refsols/pagerank_a2_sqlite.sql | 17 ++--- tests/test_sql_refsols/pagerank_a6_sqlite.sql | 41 ++++------- tests/test_sql_refsols/pagerank_b3_sqlite.sql | 23 +++--- tests/test_sql_refsols/pagerank_c4_sqlite.sql | 29 +++----- tests/test_sql_refsols/pagerank_d5_sqlite.sql | 35 ++++----- tests/test_sql_refsols/pagerank_h8_sqlite.sql | 53 +++++--------- 17 files changed, 238 insertions(+), 294 deletions(-) diff --git a/tests/test_plan_refsols/pagerank_a0.txt b/tests/test_plan_refsols/pagerank_a0.txt index 89355a8da..d7e58adbe 100644 --- a/tests/test_plan_refsols/pagerank_a0.txt +++ b/tests/test_plan_refsols/pagerank_a0.txt @@ -1,7 +1,3 @@ -ROOT(columns=[('key', anything_s_key), ('page_rank', page_rank_0)], orderings=[(anything_s_key):asc_first]) - PROJECT(columns={'anything_s_key': anything_s_key, 'page_rank_0': ROUND(1.0:numeric / anything_agg_2, 5:numeric)}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_agg_2': ANYTHING(agg_2), 'anything_s_key': ANYTHING(s_key)}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_2': t0.agg_2, 's_key': t0.s_key}) - PROJECT(columns={'agg_2': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source}) +ROOT(columns=[('key', s_key), ('page_rank', page_rank_0)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_0': ROUND(1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 5:numeric), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index b3053e78b..5d72b1ec3 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -1,14 +1,14 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index 16646f254..e2deffe86 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -1,18 +1,18 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_20, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_18, columns={'page_rank_0_20': page_rank_0_20, 's_key': s_key}) - PROJECT(columns={'dummy_link_18': dummy_link_18, 'page_rank_0_20': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_19 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_19': t0.consider_link_19, 'dummy_link_18': t0.dummy_link_18, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_19': t1.consider_link_19, 'dummy_link_18': t1.dummy_link_18, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_18': dummy_link_18, 'page_rank_0_20': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_19 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_19': t0.consider_link_19, 'damp_modifier': t0.damp_modifier, 'dummy_link_18': t0.dummy_link_18, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_19': t1.consider_link_19, 'damp_modifier': t0.damp_modifier, 'dummy_link_18': t1.dummy_link_18, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index f29c64b32..3bb61fe4e 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -1,34 +1,34 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_590, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_588, columns={'page_rank_0_590': page_rank_0_590, 's_key': s_key}) - PROJECT(columns={'dummy_link_588': dummy_link_588, 'page_rank_0_590': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_589 * page_rank_0_580 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_589': t0.consider_link_589, 'dummy_link_588': t0.dummy_link_588, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_589': t1.consider_link_589, 'dummy_link_588': t1.dummy_link_588, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580}) - FILTER(condition=dummy_link_578, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_580': page_rank_0_580, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_578': dummy_link_578, 'n_out': n_out, 'page_rank_0_580': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_579 * page_rank_0_570 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_579': t0.consider_link_579, 'dummy_link_578': t0.dummy_link_578, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_579': t1.consider_link_579, 'dummy_link_578': t1.dummy_link_578, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570}) - FILTER(condition=dummy_link_568, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_570': page_rank_0_570, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_568': dummy_link_568, 'n_out': n_out, 'page_rank_0_570': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_569 * page_rank_0_560 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_569': t0.consider_link_569, 'dummy_link_568': t0.dummy_link_568, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_569': t1.consider_link_569, 'dummy_link_568': t1.dummy_link_568, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560}) - FILTER(condition=dummy_link_558, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_560': page_rank_0_560, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_558': dummy_link_558, 'n_out': n_out, 'page_rank_0_560': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_559 * page_rank_0_550 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_559': t0.consider_link_559, 'dummy_link_558': t0.dummy_link_558, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_559': t1.consider_link_559, 'dummy_link_558': t1.dummy_link_558, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550}) - FILTER(condition=dummy_link_548, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_550': page_rank_0_550, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_548': dummy_link_548, 'n_out': n_out, 'page_rank_0_550': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_549 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_549': t0.consider_link_549, 'dummy_link_548': t0.dummy_link_548, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_549': t1.consider_link_549, 'dummy_link_548': t1.dummy_link_548, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_588': dummy_link_588, 'page_rank_0_590': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_589 * page_rank_0_580 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_589': t0.consider_link_589, 'damp_modifier': t0.damp_modifier, 'dummy_link_588': t0.dummy_link_588, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_589': t1.consider_link_589, 'damp_modifier': t0.damp_modifier, 'dummy_link_588': t1.dummy_link_588, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580}) + FILTER(condition=dummy_link_578, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_580': page_rank_0_580, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_578': dummy_link_578, 'n_out': n_out, 'page_rank_0_580': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_579 * page_rank_0_570 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_579': t0.consider_link_579, 'damp_modifier': t0.damp_modifier, 'dummy_link_578': t0.dummy_link_578, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_579': t1.consider_link_579, 'damp_modifier': t0.damp_modifier, 'dummy_link_578': t1.dummy_link_578, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570}) + FILTER(condition=dummy_link_568, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_570': page_rank_0_570, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_568': dummy_link_568, 'n_out': n_out, 'page_rank_0_570': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_569 * page_rank_0_560 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_569': t0.consider_link_569, 'damp_modifier': t0.damp_modifier, 'dummy_link_568': t0.dummy_link_568, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_569': t1.consider_link_569, 'damp_modifier': t0.damp_modifier, 'dummy_link_568': t1.dummy_link_568, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560}) + FILTER(condition=dummy_link_558, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_560': page_rank_0_560, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_558': dummy_link_558, 'n_out': n_out, 'page_rank_0_560': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_559 * page_rank_0_550 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_559': t0.consider_link_559, 'damp_modifier': t0.damp_modifier, 'dummy_link_558': t0.dummy_link_558, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_559': t1.consider_link_559, 'damp_modifier': t0.damp_modifier, 'dummy_link_558': t1.dummy_link_558, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550}) + FILTER(condition=dummy_link_548, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_550': page_rank_0_550, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_548': dummy_link_548, 'n_out': n_out, 'page_rank_0_550': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_549 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_549': t0.consider_link_549, 'damp_modifier': t0.damp_modifier, 'dummy_link_548': t0.dummy_link_548, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_549': t1.consider_link_549, 'damp_modifier': t0.damp_modifier, 'dummy_link_548': t1.dummy_link_548, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index d78322a4d..0b43e4e96 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -1,22 +1,22 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_58, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_56, columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) - PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t0.consider_link_57, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t1.consider_link_57, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) - FILTER(condition=dummy_link_46, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t0.consider_link_47, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t1.consider_link_47, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_57': t0.consider_link_57, 'damp_modifier': t0.damp_modifier, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_57': t1.consider_link_57, 'damp_modifier': t0.damp_modifier, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) + FILTER(condition=dummy_link_46, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_47': t0.consider_link_47, 'damp_modifier': t0.damp_modifier, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_47': t1.consider_link_47, 'damp_modifier': t0.damp_modifier, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index 1c504e853..d8f77fd16 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -1,26 +1,26 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_134, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_132, columns={'page_rank_0_134': page_rank_0_134, 's_key': s_key}) - PROJECT(columns={'dummy_link_132': dummy_link_132, 'page_rank_0_134': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_133 * page_rank_0_124 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_133': t0.consider_link_133, 'dummy_link_132': t0.dummy_link_132, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_133': t1.consider_link_133, 'dummy_link_132': t1.dummy_link_132, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124}) - FILTER(condition=dummy_link_122, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_124': page_rank_0_124, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_122': dummy_link_122, 'n_out': n_out, 'page_rank_0_124': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_123 * page_rank_0_114 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_123': t0.consider_link_123, 'dummy_link_122': t0.dummy_link_122, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_123': t1.consider_link_123, 'dummy_link_122': t1.dummy_link_122, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114}) - FILTER(condition=dummy_link_112, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_114': page_rank_0_114, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_112': dummy_link_112, 'n_out': n_out, 'page_rank_0_114': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_113 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_113': t0.consider_link_113, 'dummy_link_112': t0.dummy_link_112, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_113': t1.consider_link_113, 'dummy_link_112': t1.dummy_link_112, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_132': dummy_link_132, 'page_rank_0_134': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_133 * page_rank_0_124 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_133': t0.consider_link_133, 'damp_modifier': t0.damp_modifier, 'dummy_link_132': t0.dummy_link_132, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_133': t1.consider_link_133, 'damp_modifier': t0.damp_modifier, 'dummy_link_132': t1.dummy_link_132, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124}) + FILTER(condition=dummy_link_122, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_124': page_rank_0_124, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_122': dummy_link_122, 'n_out': n_out, 'page_rank_0_124': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_123 * page_rank_0_114 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_123': t0.consider_link_123, 'damp_modifier': t0.damp_modifier, 'dummy_link_122': t0.dummy_link_122, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_123': t1.consider_link_123, 'damp_modifier': t0.damp_modifier, 'dummy_link_122': t1.dummy_link_122, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114}) + FILTER(condition=dummy_link_112, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_114': page_rank_0_114, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_112': dummy_link_112, 'n_out': n_out, 'page_rank_0_114': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_113 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_113': t0.consider_link_113, 'damp_modifier': t0.damp_modifier, 'dummy_link_112': t0.dummy_link_112, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_113': t1.consider_link_113, 'damp_modifier': t0.damp_modifier, 'dummy_link_112': t1.dummy_link_112, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index fb3b3c239..32c783fd9 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -1,30 +1,30 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_286, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_284, columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) - PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t0.consider_link_285, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t1.consider_link_285, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) - FILTER(condition=dummy_link_274, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t0.consider_link_275, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t1.consider_link_275, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) - FILTER(condition=dummy_link_264, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t0.consider_link_265, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t1.consider_link_265, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) - FILTER(condition=dummy_link_254, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t0.consider_link_255, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t1.consider_link_255, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_285': t0.consider_link_285, 'damp_modifier': t0.damp_modifier, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_285': t1.consider_link_285, 'damp_modifier': t0.damp_modifier, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) + FILTER(condition=dummy_link_274, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_275': t0.consider_link_275, 'damp_modifier': t0.damp_modifier, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_275': t1.consider_link_275, 'damp_modifier': t0.damp_modifier, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) + FILTER(condition=dummy_link_264, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_265': t0.consider_link_265, 'damp_modifier': t0.damp_modifier, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_265': t1.consider_link_265, 'damp_modifier': t0.damp_modifier, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) + FILTER(condition=dummy_link_254, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_255': t0.consider_link_255, 'damp_modifier': t0.damp_modifier, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_255': t1.consider_link_255, 'damp_modifier': t0.damp_modifier, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt index 045977af7..e33c88596 100644 --- a/tests/test_plan_refsols/pagerank_h8.txt +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -1,42 +1,42 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_2414, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_2412, columns={'page_rank_0_2414': page_rank_0_2414, 's_key': s_key}) - PROJECT(columns={'dummy_link_2412': dummy_link_2412, 'page_rank_0_2414': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2413 * page_rank_0_2404 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2413': t0.consider_link_2413, 'dummy_link_2412': t0.dummy_link_2412, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2413': t1.consider_link_2413, 'dummy_link_2412': t1.dummy_link_2412, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404}) - FILTER(condition=dummy_link_2402, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2404': page_rank_0_2404, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2402': dummy_link_2402, 'n_out': n_out, 'page_rank_0_2404': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2403 * page_rank_0_2394 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2403': t0.consider_link_2403, 'dummy_link_2402': t0.dummy_link_2402, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2403': t1.consider_link_2403, 'dummy_link_2402': t1.dummy_link_2402, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394}) - FILTER(condition=dummy_link_2392, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2394': page_rank_0_2394, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2392': dummy_link_2392, 'n_out': n_out, 'page_rank_0_2394': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2393 * page_rank_0_2384 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2393': t0.consider_link_2393, 'dummy_link_2392': t0.dummy_link_2392, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2393': t1.consider_link_2393, 'dummy_link_2392': t1.dummy_link_2392, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384}) - FILTER(condition=dummy_link_2382, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2384': page_rank_0_2384, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2382': dummy_link_2382, 'n_out': n_out, 'page_rank_0_2384': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2383 * page_rank_0_2374 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2383': t0.consider_link_2383, 'dummy_link_2382': t0.dummy_link_2382, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2383': t1.consider_link_2383, 'dummy_link_2382': t1.dummy_link_2382, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374}) - FILTER(condition=dummy_link_2372, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2374': page_rank_0_2374, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2372': dummy_link_2372, 'n_out': n_out, 'page_rank_0_2374': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2373 * page_rank_0_2364 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2373': t0.consider_link_2373, 'dummy_link_2372': t0.dummy_link_2372, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2373': t1.consider_link_2373, 'dummy_link_2372': t1.dummy_link_2372, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364}) - FILTER(condition=dummy_link_2362, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2364': page_rank_0_2364, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2362': dummy_link_2362, 'n_out': n_out, 'page_rank_0_2364': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2363 * page_rank_0_2354 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2363': t0.consider_link_2363, 'dummy_link_2362': t0.dummy_link_2362, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2363': t1.consider_link_2363, 'dummy_link_2362': t1.dummy_link_2362, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354}) - FILTER(condition=dummy_link_2352, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2354': page_rank_0_2354, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2352': dummy_link_2352, 'n_out': n_out, 'page_rank_0_2354': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2353 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2353': t0.consider_link_2353, 'dummy_link_2352': t0.dummy_link_2352, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2353': t1.consider_link_2353, 'dummy_link_2352': t1.dummy_link_2352, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_2412': dummy_link_2412, 'page_rank_0_2414': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2413 * page_rank_0_2404 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2413': t0.consider_link_2413, 'damp_modifier': t0.damp_modifier, 'dummy_link_2412': t0.dummy_link_2412, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2413': t1.consider_link_2413, 'damp_modifier': t0.damp_modifier, 'dummy_link_2412': t1.dummy_link_2412, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404}) + FILTER(condition=dummy_link_2402, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2404': page_rank_0_2404, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2402': dummy_link_2402, 'n_out': n_out, 'page_rank_0_2404': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2403 * page_rank_0_2394 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2403': t0.consider_link_2403, 'damp_modifier': t0.damp_modifier, 'dummy_link_2402': t0.dummy_link_2402, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2403': t1.consider_link_2403, 'damp_modifier': t0.damp_modifier, 'dummy_link_2402': t1.dummy_link_2402, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394}) + FILTER(condition=dummy_link_2392, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2394': page_rank_0_2394, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2392': dummy_link_2392, 'n_out': n_out, 'page_rank_0_2394': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2393 * page_rank_0_2384 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2393': t0.consider_link_2393, 'damp_modifier': t0.damp_modifier, 'dummy_link_2392': t0.dummy_link_2392, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2393': t1.consider_link_2393, 'damp_modifier': t0.damp_modifier, 'dummy_link_2392': t1.dummy_link_2392, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384}) + FILTER(condition=dummy_link_2382, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2384': page_rank_0_2384, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2382': dummy_link_2382, 'n_out': n_out, 'page_rank_0_2384': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2383 * page_rank_0_2374 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2383': t0.consider_link_2383, 'damp_modifier': t0.damp_modifier, 'dummy_link_2382': t0.dummy_link_2382, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2383': t1.consider_link_2383, 'damp_modifier': t0.damp_modifier, 'dummy_link_2382': t1.dummy_link_2382, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374}) + FILTER(condition=dummy_link_2372, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2374': page_rank_0_2374, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2372': dummy_link_2372, 'n_out': n_out, 'page_rank_0_2374': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2373 * page_rank_0_2364 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2373': t0.consider_link_2373, 'damp_modifier': t0.damp_modifier, 'dummy_link_2372': t0.dummy_link_2372, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2373': t1.consider_link_2373, 'damp_modifier': t0.damp_modifier, 'dummy_link_2372': t1.dummy_link_2372, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364}) + FILTER(condition=dummy_link_2362, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2364': page_rank_0_2364, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2362': dummy_link_2362, 'n_out': n_out, 'page_rank_0_2364': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2363 * page_rank_0_2354 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2363': t0.consider_link_2363, 'damp_modifier': t0.damp_modifier, 'dummy_link_2362': t0.dummy_link_2362, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2363': t1.consider_link_2363, 'damp_modifier': t0.damp_modifier, 'dummy_link_2362': t1.dummy_link_2362, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354}) + FILTER(condition=dummy_link_2352, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2354': page_rank_0_2354, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2352': dummy_link_2352, 'n_out': n_out, 'page_rank_0_2354': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2353 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2353': t0.consider_link_2353, 'damp_modifier': t0.damp_modifier, 'dummy_link_2352': t0.dummy_link_2352, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2353': t1.consider_link_2353, 'damp_modifier': t0.damp_modifier, 'dummy_link_2352': t1.dummy_link_2352, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 5fd4aad86..6ed2db9b9 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3064,7 +3064,10 @@ def pagerank(n_iters): # The seed value for the PageRank computation, which is evenly distributed. # Also computes the number of sites in the graph & the number of sites each # site links to, which are both used downstream. - source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n, n_out=n_out_expr) + source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n) + + if n_iters > 0: + source = source.CALCULATE(n_out=n_out_expr, damp_modifier=0.15 / n) # Repeats the following procedure for n_iters iterations to build the next # generation of PageRank values from the current generation. @@ -3084,9 +3087,9 @@ def pagerank(n_iters): ) .target_site.PARTITION(name=f"s{i}", by=key) .target_site.CALCULATE( - n, + damp_modifier, n_out, - page_rank=(1.0 - d) / n + page_rank=damp_modifier + d * RELSUM(consider_link * page_rank / n_out, per=f"s{i}"), ) .WHERE(dummy_link) diff --git a/tests/test_sql_refsols/pagerank_a0_sqlite.sql b/tests/test_sql_refsols/pagerank_a0_sqlite.sql index 4fe8f22be..866d2c84f 100644 --- a/tests/test_sql_refsols/pagerank_a0_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a0_sqlite.sql @@ -1,16 +1,12 @@ -WITH _s0 AS ( +WITH _t0 AS ( SELECT - COUNT(*) OVER () AS agg_2, + ROUND(CAST(1.0 AS REAL) / COUNT(*) OVER (), 5) AS page_rank_0, s_key FROM main.sites ) SELECT - MAX(_s0.s_key) AS key, - ROUND(CAST(1.0 AS REAL) / MAX(_s0.agg_2), 5) AS page_rank -FROM _s0 AS _s0 -JOIN main.links AS links - ON _s0.s_key = links.l_source -GROUP BY - _s0.s_key + s_key AS key, + page_rank_0 AS page_rank +FROM _t0 ORDER BY - MAX(_s0.s_key) + s_key diff --git a/tests/test_sql_refsols/pagerank_a1_sqlite.sql b/tests/test_sql_refsols/pagerank_a1_sqlite.sql index d7965056f..c590ae2af 100644 --- a/tests/test_sql_refsols/pagerank_a1_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a1_sqlite.sql @@ -5,6 +5,7 @@ WITH _t7 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t7 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t7 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,11 +29,9 @@ WITH _t7 AS ( _s0.s_key ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, diff --git a/tests/test_sql_refsols/pagerank_a2_sqlite.sql b/tests/test_sql_refsols/pagerank_a2_sqlite.sql index 0b90f0fd8..991a214e5 100644 --- a/tests/test_sql_refsols/pagerank_a2_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a2_sqlite.sql @@ -5,6 +5,7 @@ WITH _t9 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t9 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t9 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t9 AS ( _s0.s_key ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t10.l_source <> _t10.l_target OR _t10.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t10.l_source <> _t10.l_target OR _t10.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t10.l_target IS NULL AND _t10.l_source = _t10.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,9 +45,7 @@ WITH _t9 AS ( ON _s5.s_key = _t10.l_target OR _t10.l_target IS NULL ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t11.l_source <> _t11.l_target OR _t11.l_target IS NULL AS INTEGER) * _t4.page_rank_0 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_a6_sqlite.sql b/tests/test_sql_refsols/pagerank_a6_sqlite.sql index 24a887003..1eb55bf92 100644 --- a/tests/test_sql_refsols/pagerank_a6_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a6_sqlite.sql @@ -5,6 +5,7 @@ WITH _t17 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t17 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t17 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t17 AS ( _s0.s_key ), _t12 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t17 AS ( ON _s5.s_key = _t18.l_target OR _t18.l_target IS NULL ), _t10 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t12.anything_n - ) + 0.85 * SUM( + _t12.damp_modifier + 0.85 * SUM( CAST(( CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t12.page_rank_0 ) AS REAL) / _t12.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_550, - _t12.anything_n, + _t12.damp_modifier, NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_548, _t12.n_out, _s9.s_key @@ -66,14 +63,12 @@ WITH _t17 AS ( _t12.dummy_link ), _t8 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t10.anything_n - ) + 0.85 * SUM( + _t10.damp_modifier + 0.85 * SUM( CAST(( CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t10.page_rank_0_550 ) AS REAL) / _t10.n_out ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_560, - _t10.anything_n, + _t10.damp_modifier, NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_558, _t10.n_out, _s13.s_key @@ -86,14 +81,12 @@ WITH _t17 AS ( _t10.dummy_link_548 ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( + _t8.damp_modifier + 0.85 * SUM( CAST(( CAST(_t21.l_source <> _t21.l_target OR _t21.l_target IS NULL AS INTEGER) * _t8.page_rank_0_560 ) AS REAL) / _t8.n_out ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_570, - _t8.anything_n, + _t8.damp_modifier, NOT _t21.l_target IS NULL AND _t21.l_source = _t21.l_target AS dummy_link_568, _t8.n_out, _s17.s_key @@ -106,14 +99,12 @@ WITH _t17 AS ( _t8.dummy_link_558 ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _t6.page_rank_0_570 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_580, - _t6.anything_n, + _t6.damp_modifier, NOT _t22.l_target IS NULL AND _t22.l_source = _t22.l_target AS dummy_link_578, _t6.n_out, _s21.s_key @@ -126,9 +117,7 @@ WITH _t17 AS ( _t6.dummy_link_568 ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _t4.page_rank_0_580 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql index 40404439e..324265a0b 100644 --- a/tests/test_sql_refsols/pagerank_b3_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_b3_sqlite.sql @@ -5,6 +5,7 @@ WITH _t11 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t11 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t11 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t11 AS ( _s0.s_key ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t12.l_target IS NULL AND _t12.l_source = _t12.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t11 AS ( ON _s5.s_key = _t12.l_target OR _t12.l_target IS NULL ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t13.l_source <> _t13.l_target OR _t13.l_target IS NULL AS INTEGER) * _t6.page_rank_0 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_48, - _t6.anything_n, + _t6.damp_modifier, NOT _t13.l_target IS NULL AND _t13.l_source = _t13.l_target AS dummy_link_46, _t6.n_out, _s9.s_key @@ -66,9 +63,7 @@ WITH _t11 AS ( _t6.dummy_link ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _t4.page_rank_0_48 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_c4_sqlite.sql b/tests/test_sql_refsols/pagerank_c4_sqlite.sql index e0c215a16..8dc1fb5dd 100644 --- a/tests/test_sql_refsols/pagerank_c4_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_c4_sqlite.sql @@ -5,6 +5,7 @@ WITH _t13 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t13 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t13 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t13 AS ( _s0.s_key ), _t8 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t13 AS ( ON _s5.s_key = _t14.l_target OR _t14.l_target IS NULL ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( + _t8.damp_modifier + 0.85 * SUM( CAST(( CAST(_t15.l_source <> _t15.l_target OR _t15.l_target IS NULL AS INTEGER) * _t8.page_rank_0 ) AS REAL) / _t8.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_114, - _t8.anything_n, + _t8.damp_modifier, NOT _t15.l_target IS NULL AND _t15.l_source = _t15.l_target AS dummy_link_112, _t8.n_out, _s9.s_key @@ -66,14 +63,12 @@ WITH _t13 AS ( _t8.dummy_link ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _t6.page_rank_0_114 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_124, - _t6.anything_n, + _t6.damp_modifier, NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link_122, _t6.n_out, _s13.s_key @@ -86,9 +81,7 @@ WITH _t13 AS ( _t6.dummy_link_112 ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t4.page_rank_0_124 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql index f6f5cb16e..df1187f10 100644 --- a/tests/test_sql_refsols/pagerank_d5_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_d5_sqlite.sql @@ -5,6 +5,7 @@ WITH _t15 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t15 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t15 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t15 AS ( _s0.s_key ), _t10 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t15 AS ( ON _s5.s_key = _t16.l_target OR _t16.l_target IS NULL ), _t8 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t10.anything_n - ) + 0.85 * SUM( + _t10.damp_modifier + 0.85 * SUM( CAST(( CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t10.page_rank_0 ) AS REAL) / _t10.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_256, - _t10.anything_n, + _t10.damp_modifier, NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_254, _t10.n_out, _s9.s_key @@ -66,14 +63,12 @@ WITH _t15 AS ( _t10.dummy_link ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( + _t8.damp_modifier + 0.85 * SUM( CAST(( CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _t8.page_rank_0_256 ) AS REAL) / _t8.n_out ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_266, - _t8.anything_n, + _t8.damp_modifier, NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link_264, _t8.n_out, _s13.s_key @@ -86,14 +81,12 @@ WITH _t15 AS ( _t8.dummy_link_254 ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t6.page_rank_0_266 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_276, - _t6.anything_n, + _t6.damp_modifier, NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_274, _t6.n_out, _s17.s_key @@ -106,9 +99,7 @@ WITH _t15 AS ( _t6.dummy_link_264 ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t4.page_rank_0_276 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_h8_sqlite.sql b/tests/test_sql_refsols/pagerank_h8_sqlite.sql index 017bc5921..f603efd8d 100644 --- a/tests/test_sql_refsols/pagerank_h8_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_h8_sqlite.sql @@ -5,6 +5,7 @@ WITH _t21 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t21 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t21 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t21 AS ( _s0.s_key ), _t16 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t22.l_target IS NULL AND _t22.l_source = _t22.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t21 AS ( ON _s5.s_key = _t22.l_target OR _t22.l_target IS NULL ), _t14 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t16.anything_n - ) + 0.85 * SUM( + _t16.damp_modifier + 0.85 * SUM( CAST(( CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _t16.page_rank_0 ) AS REAL) / _t16.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_2354, - _t16.anything_n, + _t16.damp_modifier, NOT _t23.l_target IS NULL AND _t23.l_source = _t23.l_target AS dummy_link_2352, _t16.n_out, _s9.s_key @@ -66,14 +63,12 @@ WITH _t21 AS ( _t16.dummy_link ), _t12 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t14.anything_n - ) + 0.85 * SUM( + _t14.damp_modifier + 0.85 * SUM( CAST(( CAST(_t24.l_source <> _t24.l_target OR _t24.l_target IS NULL AS INTEGER) * _t14.page_rank_0_2354 ) AS REAL) / _t14.n_out ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_2364, - _t14.anything_n, + _t14.damp_modifier, NOT _t24.l_target IS NULL AND _t24.l_source = _t24.l_target AS dummy_link_2362, _t14.n_out, _s13.s_key @@ -86,14 +81,12 @@ WITH _t21 AS ( _t14.dummy_link_2352 ), _t10 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t12.anything_n - ) + 0.85 * SUM( + _t12.damp_modifier + 0.85 * SUM( CAST(( CAST(_t25.l_source <> _t25.l_target OR _t25.l_target IS NULL AS INTEGER) * _t12.page_rank_0_2364 ) AS REAL) / _t12.n_out ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_2374, - _t12.anything_n, + _t12.damp_modifier, NOT _t25.l_target IS NULL AND _t25.l_source = _t25.l_target AS dummy_link_2372, _t12.n_out, _s17.s_key @@ -106,14 +99,12 @@ WITH _t21 AS ( _t12.dummy_link_2362 ), _t8 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t10.anything_n - ) + 0.85 * SUM( + _t10.damp_modifier + 0.85 * SUM( CAST(( CAST(_t26.l_source <> _t26.l_target OR _t26.l_target IS NULL AS INTEGER) * _t10.page_rank_0_2374 ) AS REAL) / _t10.n_out ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_2384, - _t10.anything_n, + _t10.damp_modifier, NOT _t26.l_target IS NULL AND _t26.l_source = _t26.l_target AS dummy_link_2382, _t10.n_out, _s21.s_key @@ -126,14 +117,12 @@ WITH _t21 AS ( _t10.dummy_link_2372 ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( + _t8.damp_modifier + 0.85 * SUM( CAST(( CAST(_t27.l_source <> _t27.l_target OR _t27.l_target IS NULL AS INTEGER) * _t8.page_rank_0_2384 ) AS REAL) / _t8.n_out ) OVER (PARTITION BY _s25.s_key) AS page_rank_0_2394, - _t8.anything_n, + _t8.damp_modifier, NOT _t27.l_target IS NULL AND _t27.l_source = _t27.l_target AS dummy_link_2392, _t8.n_out, _s25.s_key @@ -146,14 +135,12 @@ WITH _t21 AS ( _t8.dummy_link_2382 ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t28.l_source <> _t28.l_target OR _t28.l_target IS NULL AS INTEGER) * _t6.page_rank_0_2394 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s29.s_key) AS page_rank_0_2404, - _t6.anything_n, + _t6.damp_modifier, NOT _t28.l_target IS NULL AND _t28.l_source = _t28.l_target AS dummy_link_2402, _t6.n_out, _s29.s_key @@ -166,9 +153,7 @@ WITH _t21 AS ( _t6.dummy_link_2392 ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t29.l_source <> _t29.l_target OR _t29.l_target IS NULL AS INTEGER) * _t4.page_rank_0_2404 ) AS REAL) / _t4.n_out From 0508822866ff0b6c7495af1b5dd7de9297f8ce23 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 19:01:38 -0400 Subject: [PATCH 042/143] Initial revisions --- tests/conftest.py | 126 +------------------ tests/gen_data/gen_pagerank.py | 153 +++++++++++++++++++++++ tests/test_metadata/pagerank_graphs.json | 2 +- tests/testing_utilities.py | 4 +- 4 files changed, 162 insertions(+), 123 deletions(-) create mode 100644 tests/gen_data/gen_pagerank.py diff --git a/tests/conftest.py b/tests/conftest.py index 21b362b2d..99a34ae75 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,6 +24,7 @@ from pydough.qdag import AstNodeBuilder from tests.testing_utilities import graph_fetcher +from .gen_data.gen_pagerank import gen_pagerank_records, pagerank_configs from .gen_data.gen_technograph import gen_technograph_records @@ -456,90 +457,9 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: # Setup the directory to be the main PyDough directory. base_dir: str = os.path.dirname(os.path.dirname(__file__)) - # The configurations for the pagerank databases. Each tuple contains: - # - The name of the database. - # - The number of nodes n in the graph. - # - The edges in the graph as a list of tuples (src, dst), assuming the - # nodes are numbered from 1 to n. - pagerank_configs = [ - ("PAGERANK_A", 4, [(1, 2), (2, 1), (2, 3), (3, 4), (4, 1), (4, 2)]), - ("PAGERANK_B", 5, [(1, 2), (2, 1), (2, 5), (3, 2), (4, 2), (4, 5), (5, 3)]), - ( - "PAGERANK_C", - 8, - [ - (1, 2), - (1, 6), - (2, 1), - (2, 5), - (2, 6), - (3, 2), - (4, 2), - (4, 5), - (5, 3), - (7, 8), - (8, 7), - ], - ), - ( - "PAGERANK_D", - 16, - [ - (1, 2), - (1, 3), - (1, 4), - (1, 5), - (2, 1), - (2, 5), - (3, 2), - (4, 2), - (4, 5), - (4, 11), - (5, 3), - (5, 11), - (5, 14), - (5, 16), - (6, 7), - (7, 8), - (8, 6), - (8, 7), - (9, 2), - (9, 10), - (11, 12), - (12, 13), - (12, 14), - (13, 4), - (13, 5), - (15, 2), - ], - ), - ("PAGERANK_E", 5, [(i, j) for i in range(1, 6) for j in range(1, 6) if i != j]), - ("PAGERANK_F", 100, []), - ( - "PAGERANK_G", - 1000, - [ - (j + 1, i + 1) - for i in range(1000) - for j in range(i + 1, 1000) - if str(i) in str(j) - ], - ), - ( - "PAGERANK_H", - 50, - [ - (i, j) - for i in range(1, 51) - for j in range(1, 51) - if i != j and (i < j or i % j == 0) - ], - ), - ] - # Setup each of the the pagerank databases using the configurations. result: dict[str, DatabaseContext] = {} - for name, nodes, vertices in pagerank_configs: + for name, nodes, edges in pagerank_configs(): # Create the database and ensure it is empty. subprocess.run( f"cd tests; rm -fv gen_data/{name.lower()}.db; sqlite3 gen_data/{name.lower()}.db < gen_data/init_pagerank.sql", @@ -547,44 +467,10 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: ) path: str = os.path.join(base_dir, f"tests/gen_data/{name.lower()}.db") connection: sqlite3.Connection = sqlite3.connect(path) - cursor: sqlite3.Cursor = connection.cursor() - - # For every node, insert an entry into the SITES table. - for site in range(nodes): - cursor.execute( - "INSERT INTO SITES VALUES (?, ?)", - (site + 1, f"SITE {chr(ord('A') + site)}"), - ) - - # For every edge, insert an entry into the LINKS table. Keep track of - # the nodes that have no outgoing links. - no_outgoing: set[int] = set(range(1, nodes + 1)) - for src, dst in vertices: - no_outgoing.discard(src) - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (src, dst), - ) - - # If there are no outgoing links for a site, insert a NULL link for it, - # indicating that the site links to ALL sites. - for site in no_outgoing: - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (site, None), - ) - - # Insert a dummy self-link for every site. - for site in range(1, nodes + 1): - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (site, site), - ) - - # Commit the changes, close the cursor, and store the context in the - # result dictionary. - cursor.connection.commit() - cursor.close() + + # Fill the tables of the database using the nodes/edges, then store the + # database context in the result. + gen_pagerank_records(connection, nodes, edges) result[name] = DatabaseContext( DatabaseConnection(connection), DatabaseDialect.SQLITE ) diff --git a/tests/gen_data/gen_pagerank.py b/tests/gen_data/gen_pagerank.py new file mode 100644 index 000000000..703ec5c32 --- /dev/null +++ b/tests/gen_data/gen_pagerank.py @@ -0,0 +1,153 @@ +""" +Logic used to generate the various sqlite databases used for PageRank tests. +""" + +import sqlite3 + + +def pagerank_configs() -> list[tuple[str, int, list[tuple[int, int]]]]: + """ + Returns a list of configurations for generating PageRank test data. + Each tuple contains: + - The name of the configuration (should be in the form "PAGERANK_X"). + - The number of vertices in the graph (numbered 1 to n) + - The list of tuples indicating edges in the graph in the form (src, dest). + """ + configs: list[tuple[str, int, list[tuple[int, int]]]] = [] + configs.append(("PAGERANK_A", 4, [(1, 2), (2, 1), (2, 3), (3, 4), (4, 1), (4, 2)])) + configs.append( + ("PAGERANK_B", 5, [(1, 2), (2, 1), (2, 5), (3, 2), (4, 2), (4, 5), (5, 3)]) + ) + configs.append( + ( + "PAGERANK_C", + 8, + [ + (1, 2), + (1, 6), + (2, 1), + (2, 5), + (2, 6), + (3, 2), + (4, 2), + (4, 5), + (5, 3), + (7, 8), + (8, 7), + ], + ) + ) + configs.append( + ( + "PAGERANK_D", + 16, + [ + (1, 2), + (1, 3), + (1, 4), + (1, 5), + (2, 1), + (2, 5), + (3, 2), + (4, 2), + (4, 5), + (4, 11), + (5, 3), + (5, 11), + (5, 14), + (5, 16), + (6, 7), + (7, 8), + (8, 6), + (8, 7), + (9, 2), + (9, 10), + (11, 12), + (12, 13), + (12, 14), + (13, 4), + (13, 5), + (15, 2), + ], + ) + ) + configs.append( + ("PAGERANK_E", 5, [(i, j) for i in range(1, 6) for j in range(1, 6) if i != j]) + ) + configs.append(("PAGERANK_F", 100, [])) + configs.append( + ( + "PAGERANK_G", + 1000, + [ + (j + 1, i + 1) + for i in range(1000) + for j in range(i + 1, 1000) + if str(i) in str(j) + ], + ) + ) + configs.append( + ( + "PAGERANK_H", + 50, + [ + (i, j) + for i in range(1, 51) + for j in range(1, 51) + if i != j and (i < j or i % j == 0) + ], + ) + ) + return configs + + +def gen_pagerank_records( + connection: sqlite3.Connection, nodes: int, edges: list[tuple[int, int]] +) -> None: + """ + Fills a sqlite database with PageRank test data based on the provided + configuration. + + Args: + `connection`: The sqlite3 connection to the database. + `nodes`: The number of nodes in the graph. + `edges`: A list of tuples representing the edges in the graph. + """ + cursor: sqlite3.Cursor = connection.cursor() + + # For every node, insert an entry into the SITES table. + for site in range(nodes): + cursor.execute( + "INSERT INTO SITES VALUES (?, ?)", + (site + 1, f"SITE {hex(site)[2:]:0>4}"), + ) + + # For every edge, insert an entry into the LINKS table. Keep track of + # the nodes that have no outgoing links. + no_outgoing: set[int] = set(range(1, nodes + 1)) + for src, dst in edges: + no_outgoing.discard(src) + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (src, dst), + ) + + # If there are no outgoing links for a site, insert a NULL link for it, + # indicating that the site links to ALL sites. + for site in no_outgoing: + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, None), + ) + + # Insert a dummy self-link for every site. + for site in range(1, nodes + 1): + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, site), + ) + + # Commit the changes & close the cursor + cursor.connection.commit() + cursor.close() diff --git a/tests/test_metadata/pagerank_graphs.json b/tests/test_metadata/pagerank_graphs.json index bd4150ceb..bbbe11275 100644 --- a/tests/test_metadata/pagerank_graphs.json +++ b/tests/test_metadata/pagerank_graphs.json @@ -61,4 +61,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index d2c01690a..2b5750cb1 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1127,7 +1127,7 @@ def run_relational_test( """ # Skip if indicated. if self.skip_relational: - pytest.skip(f"Skipping relational plan test for {self.test_name!r}") + pytest.skip(f"Skipping relational plan test for {self.test_name}") # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) @@ -1185,7 +1185,7 @@ def run_sql_test( """ # Skip if indicated. if self.skip_sql: - pytest.skip(f"Skipping SQL text test for {self.test_name!r}") + pytest.skip(f"Skipping SQL text test for {self.test_name}") # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) From f59f45739d95f3f1054546ef4b61a9f849a7cb23 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 19:52:38 -0400 Subject: [PATCH 043/143] Added set up for simplification --- pydough/conversion/relational_converter.py | 2 + .../conversion/relational_simplification.py | 192 ++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 pydough/conversion/relational_simplification.py diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index f66debc38..8ae333b21 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -84,6 +84,7 @@ from .hybrid_tree import HybridTree from .merge_projects import merge_projects from .projection_pullup import pullup_projections +from .relational_simplification import simplify_expressions @dataclass @@ -1446,6 +1447,7 @@ def optimize_relational_tree( # Step 8: run projection pullup followed by column pruning 2x. for _ in range(2): root = confirm_root(pullup_projections(root)) + simplify_expressions(root) root = ColumnPruner().prune_unused_columns(root) # Step 9: re-run filter pushdown diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py new file mode 100644 index 000000000..aa88d0089 --- /dev/null +++ b/pydough/conversion/relational_simplification.py @@ -0,0 +1,192 @@ +""" +Logic used to simplify relational expressions in a relational node. +""" + +__all__ = ["simplify_expressions"] + + +from enum import Enum + +from pydough.relational import ( + Aggregate, + CallExpression, + ColumnReference, + EmptySingleton, + ExpressionSortInfo, + Filter, + Join, + Limit, + LiteralExpression, + Project, + RelationalExpression, + RelationalNode, + RelationalRoot, + Scan, + WindowCallExpression, +) +from pydough.relational.rel_util import ( + add_input_name, +) + + +class LogicalPredicate(Enum): + """ + Enum representing logical predicates that can be inferred about relational + expressions. + """ + + NOT_NULL = "NOT_NULL" + NOT_NEGATIVE = "NOT_NEGATIVE" + POSITIVE = "POSITIVE" + + +def run_simplification( + expr: RelationalExpression, + input_predicates: dict[RelationalExpression, set[LogicalPredicate]], +) -> tuple[RelationalExpression, set[LogicalPredicate]]: + """ + Runs the simplification on a single expression, applying any predicates + inferred from the input nodes to aid the process and inferring any new + predicates that apply to the resulting expression. + + Args: + `expr`: The expression to simplify. + `input_predicates`: A dictionary mapping input columns to the set of + predicates that are true for the column. + + Returns: + The simplified expression and a set of predicates that apply to the + resulting expression. + """ + new_args: list[RelationalExpression] + new_partitions: list[RelationalExpression] + new_orders: list[ExpressionSortInfo] + arg_predicates: list[set[LogicalPredicate]] + output_predicates: set[LogicalPredicate] = set() + + if isinstance(expr, LiteralExpression): + if expr.value is not None: + output_predicates.add(LogicalPredicate.NOT_NULL) + if isinstance(expr.value, (int, float)): + if expr.value >= 0: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if expr.value > 0: + output_predicates.add(LogicalPredicate.POSITIVE) + + if isinstance(expr, ColumnReference): + output_predicates.update(input_predicates.get(expr, set())) + + if isinstance(expr, CallExpression): + new_args = [] + arg_predicates = [] + for arg in expr.inputs: + new_arg, new_preds = run_simplification(arg, input_predicates) + new_args.append(new_arg) + arg_predicates.append(new_preds) + expr = CallExpression(expr.op, expr.data_type, new_args) + + if isinstance(expr, WindowCallExpression): + new_args = [] + new_partitions = [] + new_orders = [] + arg_predicates = [] + for arg in expr.inputs: + new_arg, new_preds = run_simplification(arg, input_predicates) + new_args.append(new_arg) + arg_predicates.append(new_preds) + for partition in expr.partition_inputs: + new_partition, _ = run_simplification(partition, input_predicates) + new_partitions.append(new_partition) + for order in expr.order_inputs: + new_order, _ = run_simplification(order.expr, input_predicates) + new_orders.append( + ExpressionSortInfo(new_order, order.ascending, order.nulls_first) + ) + expr = WindowCallExpression( + expr.op, + expr.data_type, + new_args, + new_partitions, + new_orders, + expr.kwargs, + ) + return expr, output_predicates + + +def simplify_expressions( + node: RelationalNode, +) -> dict[RelationalExpression, set[LogicalPredicate]]: + """ + The main recursive procedure done to perform expression simplification on + a relational node and its descendants. The transformation is done in-place + + Args: + `node`: The relational node to perform simplification on. + + Returns: + The predicates inferred from the output columns of the node. + """ + # Recursively invoke the procedure on all inputs to the node. + input_predicates: dict[RelationalExpression, set[LogicalPredicate]] = {} + for idx, input_node in enumerate(node.inputs): + input_alias: str | None = node.default_input_aliases[idx] + predicates = simplify_expressions(input_node) + for expr, preds in predicates.items(): + input_predicates[add_input_name(expr, input_alias)] = preds + + # Transform the expressions of the current node in-place. + ref_expr: RelationalExpression + output_predicates: dict[RelationalExpression, set[LogicalPredicate]] = {} + match node: + case ( + Project() + | Filter() + | Join() + | Limit() + | RelationalRoot() + | Scan() + | EmptySingleton() + ): + for name, expr in node.columns.items(): + ref_expr = ColumnReference(name, expr.data_type) + node.columns[name], output_predicates[ref_expr] = run_simplification( + expr, input_predicates + ) + if isinstance(node, (Filter, Join)): + node._condition = run_simplification(node.condition, input_predicates)[ + 0 + ] + if isinstance(node, (RelationalRoot, Limit)): + node._orderings = [ + ExpressionSortInfo( + run_simplification(order_expr.expr, input_predicates)[0], + order_expr.ascending, + order_expr.nulls_first, + ) + for order_expr in node.orderings + ] + if isinstance(node, RelationalRoot): + node._ordered_columns = [ + (name, node.columns[name]) for name, _ in node.ordered_columns + ] + case Aggregate(): + for name, expr in node.keys.items(): + ref_expr = ColumnReference(name, expr.data_type) + node.keys[name], output_predicates[ref_expr] = run_simplification( + expr, input_predicates + ) + node.columns[name] = node.keys[name] + for name, expr in node.aggregations.items(): + ref_expr = ColumnReference(name, expr.data_type) + new_agg, output_predicates[ref_expr] = run_simplification( + expr, input_predicates + ) + assert isinstance(new_agg, CallExpression) + node.aggregations[name] = new_agg + node.columns[name] = node.aggregations[name] + + # For all other nodes, do not perform any simplification. + case _: + pass + + return output_predicates From b34f6ca22729fa06d18cc5961cb8da2a86a08344 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 20:17:25 -0400 Subject: [PATCH 044/143] Added first simplification rules --- .../conversion/relational_simplification.py | 164 +++++++++++++++--- .../agg_simplification_1.txt | 2 +- .../agg_simplification_2.txt | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 2 +- tests/test_plan_refsols/common_prefix_ag.txt | 2 +- tests/test_plan_refsols/common_prefix_ah.txt | 2 +- tests/test_plan_refsols/common_prefix_ai.txt | 2 +- tests/test_plan_refsols/common_prefix_aj.txt | 2 +- tests/test_plan_refsols/common_prefix_ak.txt | 2 +- tests/test_plan_refsols/common_prefix_al.txt | 2 +- tests/test_plan_refsols/common_prefix_p.txt | 2 +- tests/test_plan_refsols/common_prefix_x.txt | 2 +- tests/test_plan_refsols/correl_18.txt | 2 +- tests/test_plan_refsols/correl_29.txt | 2 +- .../month_year_sliding_windows.txt | 4 +- .../multi_partition_access_5.txt | 15 +- .../multi_partition_access_6.txt | 4 +- tests/test_plan_refsols/simple_cross_11.txt | 2 +- .../sqlite_udf_count_epsilon.txt | 2 +- ...ograph_battery_failure_rates_anomalies.txt | 2 +- .../technograph_incident_rate_per_brand.txt | 2 +- .../technograph_most_unreliable_products.txt | 2 +- tests/test_plan_refsols/tpch_q20.txt | 2 +- .../agg_simplification_1_ansi.sql | 2 +- .../agg_simplification_1_sqlite.sql | 2 +- .../agg_simplification_2_ansi.sql | 6 +- .../agg_simplification_2_sqlite.sql | 6 +- .../sqlite_udf_count_epsilon_sqlite.sql | 2 +- ...h_battery_failure_rates_anomalies_ansi.sql | 4 +- ...battery_failure_rates_anomalies_sqlite.sql | 4 +- ...chnograph_incident_rate_per_brand_ansi.sql | 2 +- ...nograph_incident_rate_per_brand_sqlite.sql | 2 +- ...hnograph_most_unreliable_products_ansi.sql | 4 +- ...ograph_most_unreliable_products_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q20_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 2 +- 36 files changed, 187 insertions(+), 80 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index aa88d0089..58414afcb 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -7,6 +7,7 @@ from enum import Enum +import pydough.pydough_operators as pydop from pydough.relational import ( Aggregate, CallExpression, @@ -15,6 +16,7 @@ ExpressionSortInfo, Filter, Join, + JoinType, Limit, LiteralExpression, Project, @@ -40,9 +42,91 @@ class LogicalPredicate(Enum): POSITIVE = "POSITIVE" +def simplify_function_call( + expr: CallExpression, + arg_predicates: list[set[LogicalPredicate]], + no_group_aggregate: bool, +) -> tuple[RelationalExpression, set[LogicalPredicate]]: + """ + TODO + """ + output_expr: RelationalExpression = expr + output_predicates: set[LogicalPredicate] = set() + match expr.op: + case pydop.COUNT | pydop.NDISTINCT: + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if ( + len(expr.inputs) == 1 + and LogicalPredicate.NOT_NULL in arg_predicates[0] + and no_group_aggregate + ): + output_predicates.add(LogicalPredicate.POSITIVE) + case ( + pydop.SUM + | pydop.AVG + | pydop.MIN + | pydop.MAX + | pydop.ANYTHING + | pydop.MEDIAN + | pydop.QUANTILE + ): + for predicate in [ + LogicalPredicate.NOT_NULL, + LogicalPredicate.NOT_NEGATIVE, + LogicalPredicate.POSITIVE, + ]: + if predicate in arg_predicates[0]: + output_predicates.add(predicate) + case pydop.DEFAULT_TO: + if LogicalPredicate.NOT_NULL in arg_predicates[0]: + output_expr = expr.inputs[0] + output_predicates = arg_predicates[0] + else: + if any(LogicalPredicate.NOT_NULL in preds for preds in arg_predicates): + output_predicates.add(LogicalPredicate.NOT_NULL) + for pred in arg_predicates[0]: + if all(pred in preds for preds in arg_predicates): + output_predicates.add(pred) + return output_expr, output_predicates + + +def simplify_window_call( + expr: WindowCallExpression, + arg_predicates: list[set[LogicalPredicate]], +) -> tuple[RelationalExpression, set[LogicalPredicate]]: + """ + TODO + """ + output_predicates: set[LogicalPredicate] = set() + return expr, output_predicates + + +def infer_literal_predicates(expr: LiteralExpression) -> set[LogicalPredicate]: + """ + Infers logical predicates from a literal expression. + + Args: + `expr`: The literal expression to infer predicates from. + + Returns: + A set of logical predicates inferred from the literal. + """ + output_predicates: set[LogicalPredicate] = set() + if expr.value is not None: + output_predicates.add(LogicalPredicate.NOT_NULL) + if isinstance(expr.value, (int, float)): + if expr.value >= 0: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if expr.value > 0: + output_predicates.add(LogicalPredicate.POSITIVE) + return output_predicates + + def run_simplification( expr: RelationalExpression, input_predicates: dict[RelationalExpression, set[LogicalPredicate]], + no_group_aggregate: bool, ) -> tuple[RelationalExpression, set[LogicalPredicate]]: """ Runs the simplification on a single expression, applying any predicates @@ -53,6 +137,9 @@ def run_simplification( `expr`: The expression to simplify. `input_predicates`: A dictionary mapping input columns to the set of predicates that are true for the column. + `no_group_aggregate`: A boolean indicating whether the expression is + part of an aggregate operation w/o keys, which affects how predicates + are inferred. Returns: The simplified expression and a set of predicates that apply to the @@ -63,27 +150,29 @@ def run_simplification( new_orders: list[ExpressionSortInfo] arg_predicates: list[set[LogicalPredicate]] output_predicates: set[LogicalPredicate] = set() + requires_rewrite: bool = False if isinstance(expr, LiteralExpression): - if expr.value is not None: - output_predicates.add(LogicalPredicate.NOT_NULL) - if isinstance(expr.value, (int, float)): - if expr.value >= 0: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - if expr.value > 0: - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates = infer_literal_predicates(expr) if isinstance(expr, ColumnReference): - output_predicates.update(input_predicates.get(expr, set())) + output_predicates = input_predicates.get(expr, set()) if isinstance(expr, CallExpression): new_args = [] arg_predicates = [] for arg in expr.inputs: - new_arg, new_preds = run_simplification(arg, input_predicates) + new_arg, new_preds = run_simplification( + arg, input_predicates, no_group_aggregate + ) + requires_rewrite |= new_arg is not arg new_args.append(new_arg) arg_predicates.append(new_preds) - expr = CallExpression(expr.op, expr.data_type, new_args) + if requires_rewrite: + expr = CallExpression(expr.op, expr.data_type, new_args) + expr, output_predicates = simplify_function_call( + expr, arg_predicates, no_group_aggregate + ) if isinstance(expr, WindowCallExpression): new_args = [] @@ -91,25 +180,37 @@ def run_simplification( new_orders = [] arg_predicates = [] for arg in expr.inputs: - new_arg, new_preds = run_simplification(arg, input_predicates) + new_arg, new_preds = run_simplification( + arg, input_predicates, no_group_aggregate + ) + requires_rewrite |= new_arg is not arg new_args.append(new_arg) arg_predicates.append(new_preds) for partition in expr.partition_inputs: - new_partition, _ = run_simplification(partition, input_predicates) + new_partition, _ = run_simplification( + partition, input_predicates, no_group_aggregate + ) + requires_rewrite |= new_partition is not partition new_partitions.append(new_partition) for order in expr.order_inputs: - new_order, _ = run_simplification(order.expr, input_predicates) + new_order, _ = run_simplification( + order.expr, input_predicates, no_group_aggregate + ) + requires_rewrite |= new_order is not order.expr new_orders.append( ExpressionSortInfo(new_order, order.ascending, order.nulls_first) ) - expr = WindowCallExpression( - expr.op, - expr.data_type, - new_args, - new_partitions, - new_orders, - expr.kwargs, - ) + if requires_rewrite: + expr = WindowCallExpression( + expr.op, + expr.data_type, + new_args, + new_partitions, + new_orders, + expr.kwargs, + ) + expr, output_predicates = simplify_window_call(expr, arg_predicates) + return expr, output_predicates @@ -150,16 +251,16 @@ def simplify_expressions( for name, expr in node.columns.items(): ref_expr = ColumnReference(name, expr.data_type) node.columns[name], output_predicates[ref_expr] = run_simplification( - expr, input_predicates + expr, input_predicates, False ) if isinstance(node, (Filter, Join)): - node._condition = run_simplification(node.condition, input_predicates)[ - 0 - ] + node._condition = run_simplification( + node.condition, input_predicates, False + )[0] if isinstance(node, (RelationalRoot, Limit)): node._orderings = [ ExpressionSortInfo( - run_simplification(order_expr.expr, input_predicates)[0], + run_simplification(order_expr.expr, input_predicates, False)[0], order_expr.ascending, order_expr.nulls_first, ) @@ -169,17 +270,24 @@ def simplify_expressions( node._ordered_columns = [ (name, node.columns[name]) for name, _ in node.ordered_columns ] + if isinstance(node, Join) and node.join_type != JoinType.INNER: + for expr, preds in output_predicates.items(): + if ( + isinstance(expr, ColumnReference) + and expr.input_name != node.default_input_aliases[0] + ): + preds.discard(LogicalPredicate.NOT_NULL) case Aggregate(): for name, expr in node.keys.items(): ref_expr = ColumnReference(name, expr.data_type) node.keys[name], output_predicates[ref_expr] = run_simplification( - expr, input_predicates + expr, input_predicates, False ) node.columns[name] = node.keys[name] for name, expr in node.aggregations.items(): ref_expr = ColumnReference(name, expr.data_type) new_agg, output_predicates[ref_expr] = run_simplification( - expr, input_predicates + expr, input_predicates, len(node.keys) == 0 ) assert isinstance(new_agg, CallExpression) node.aggregations[name] = new_agg diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt index 014c1dbdf..a17af4c0d 100644 --- a/tests/test_plan_refsols/agg_simplification_1.txt +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('aug_exchange', aug_exchange), ('su1', DEFAULT_TO(count_one, 0:numeric)), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', DEFAULT_TO(0:numeric, 0:numeric)), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) +ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', 0:numeric), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) AGGREGATE(keys={'aug_exchange': aug_exchange}, aggregations={'agg_63': QUANTILE(aug_exchange, 0.8:numeric), 'count_one': COUNT()}) PROJECT(columns={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/agg_simplification_2.txt b/tests/test_plan_refsols/agg_simplification_2.txt index c1121462b..9a985234e 100644 --- a/tests/test_plan_refsols/agg_simplification_2.txt +++ b/tests/test_plan_refsols/agg_simplification_2.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('a3', DEFAULT_TO(sum_nj, 0:numeric)), ('a4', DEFAULT_TO(DEFAULT_TO(sum_sz, 0:numeric), 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', min_anys), ('a9', min_anys)], orderings=[(sbCustState):asc_first]) +ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', sum_n_rows), ('a3', sum_nj), ('a4', DEFAULT_TO(sum_sz, 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', min_anys), ('a9', min_anys)], orderings=[(sbCustState):asc_first]) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'max_max_sbCustPhone': MAX(sbCustPhone), 'min_anys': ANYTHING(LOWER(sbCustState)), 'min_min_sbCustPhone': MIN(sbCustPhone), 'n_rows': NDISTINCT(sbCustCity), 'sum_n_rows': COUNT(), 'sum_nj': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'sum_sz': SUM(INTEGER(sbCustPostalCode))}) SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 7e7f2a981..8e8ffd889 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(n_rows, 0:numeric)), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice_1': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index e57b541ed..0f4659d66 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index a2d4305ca..f84051d87 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index f28e37a55..19cf86f66 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index 4df2fe150..c7443fe73 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 036dab140..f580316b2 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index d5af6cd67..05d4bb7bd 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index f3d26328c..47e4f9c62 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)):asc_first, (c_name):asc_first], limit=5:numeric) +ROOT(columns=[('name', c_name), ('n_orders', n_rows), ('n_parts_ordered', n_rows_1), ('n_distinct_parts', ndistinct_l_partkey)], orderings=[(ndistinct_l_partkey / n_rows_1):asc_first, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 9de4be686..c34cd7ff5 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) +ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 646b9855d..e3dbcd52b 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) +ROOT(columns=[('n', sum_n_above_avg)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': COUNT()}) FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={}) JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index f3b0080b7..2beba08e0 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', DEFAULT_TO(n_rows, 0:numeric)), ('n_above_avg_suppliers', DEFAULT_TO(n_rows_1, 0:numeric)), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) +ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_rows_1), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 41ba7be61..25f33c177 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,8 +1,8 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) - FILTER(condition=DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric) > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 6c6ea66d7..9fad186da 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -2,18 +2,17 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.sum_n_ticker_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / sum_n_ticker_type_trans > 0.8:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 69f9b27a4..3576df074 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -2,7 +2,7 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=n_ticker_type_trans == 1:numeric | n_cust_type_trans == 1:numeric, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + FILTER(condition=sum_n_cust_type_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) @@ -43,7 +43,7 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/simple_cross_11.txt b/tests/test_plan_refsols/simple_cross_11.txt index a307e9426..ebbe3bf16 100644 --- a/tests/test_plan_refsols/simple_cross_11.txt +++ b/tests/test_plan_refsols/simple_cross_11.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('n', DEFAULT_TO(n, 0:numeric))], orderings=[]) +ROOT(columns=[('n', n)], orderings=[]) JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n': t0.n}) AGGREGATE(keys={'o_orderdate': o_orderdate}, aggregations={'n': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt index 5ed26b322..380033384 100644 --- a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', r_name), ('n_cust', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) +ROOT(columns=[('name', r_name), ('n_cust', n_rows)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index 050999da6..639a18e28 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) +ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index a64d4f50c..954d1b5a9 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) +ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index d27e1aa5b..53528b9d0 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) +ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 245e3ef0e..0ce79583c 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -6,7 +6,7 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric), columns={'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) diff --git a/tests/test_sql_refsols/agg_simplification_1_ansi.sql b/tests/test_sql_refsols/agg_simplification_1_ansi.sql index 6c346edc0..8f1e739b8 100644 --- a/tests/test_sql_refsols/agg_simplification_1_ansi.sql +++ b/tests/test_sql_refsols/agg_simplification_1_ansi.sql @@ -1,6 +1,6 @@ SELECT LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, - COALESCE(COUNT(*), 0) AS su1, + COUNT(*) AS su1, COALESCE(COUNT(*) * 2, 0) AS su2, COALESCE(COUNT(*) * -1, 0) AS su3, COALESCE(COUNT(*) * -3, 0) AS su4, diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql index 4716a0c4c..ab162413c 100644 --- a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -153,7 +153,7 @@ WITH _t1 AS ( ) SELECT LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, - COALESCE(COUNT(*), 0) AS su1, + COUNT(*) AS su1, COALESCE(COUNT(*) * 2, 0) AS su2, COALESCE(COUNT(*) * -1, 0) AS su3, COALESCE(COUNT(*) * -3, 0) AS su4, diff --git a/tests/test_sql_refsols/agg_simplification_2_ansi.sql b/tests/test_sql_refsols/agg_simplification_2_ansi.sql index d36f0482c..fef0f4709 100644 --- a/tests/test_sql_refsols/agg_simplification_2_ansi.sql +++ b/tests/test_sql_refsols/agg_simplification_2_ansi.sql @@ -1,9 +1,9 @@ SELECT sbcuststate AS state, COUNT(DISTINCT sbcustcity) AS a1, - COALESCE(COUNT(*), 0) AS a2, - COALESCE(COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END), 0) AS a3, - COALESCE(COALESCE(SUM(CAST(sbcustpostalcode AS BIGINT)), 0), 0) AS a4, + COUNT(*) AS a2, + COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END) AS a3, + COALESCE(SUM(CAST(sbcustpostalcode AS BIGINT)), 0) AS a4, MIN(sbcustphone) AS a5, MAX(sbcustphone) AS a6, ANY_VALUE(LOWER(sbcuststate)) AS a7, diff --git a/tests/test_sql_refsols/agg_simplification_2_sqlite.sql b/tests/test_sql_refsols/agg_simplification_2_sqlite.sql index deee0c7a6..22bc341e6 100644 --- a/tests/test_sql_refsols/agg_simplification_2_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_2_sqlite.sql @@ -1,9 +1,9 @@ SELECT sbcuststate AS state, COUNT(DISTINCT sbcustcity) AS a1, - COALESCE(COUNT(*), 0) AS a2, - COALESCE(COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END), 0) AS a3, - COALESCE(COALESCE(SUM(CAST(sbcustpostalcode AS INTEGER)), 0), 0) AS a4, + COUNT(*) AS a2, + COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END) AS a3, + COALESCE(SUM(CAST(sbcustpostalcode AS INTEGER)), 0) AS a4, MIN(sbcustphone) AS a5, MAX(sbcustphone) AS a6, MAX(LOWER(sbcuststate)) AS a7, diff --git a/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql index 28cbbc248..8d39a6781 100644 --- a/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql @@ -18,7 +18,7 @@ WITH _t2 AS ( ) SELECT region.r_name AS name, - COALESCE(_s3.n_rows, 0) AS n_cust + _s3.n_rows AS n_cust FROM tpch.region AS region JOIN _s3 AS _s3 ON _s3.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql index e665da566..1af11b549 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql @@ -11,7 +11,7 @@ WITH _s7 AS ( SELECT countries.co_name AS country_name, products.pr_name AS product_name, - ROUND(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) / COUNT(*), 2) AS ir + ROUND(COALESCE(SUM(_s7.n_rows), 0) / COUNT(*), 2) AS ir FROM main.countries AS countries JOIN main.devices AS devices ON countries.co_id = devices.de_production_country_id @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ROUND(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) / COUNT(*), 2) DESC, + ROUND(COALESCE(SUM(_s7.n_rows), 0) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql index 8c6123891..e7749a8a2 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql @@ -11,7 +11,7 @@ WITH _s7 AS ( SELECT countries.co_name AS country_name, products.pr_name AS product_name, - ROUND(CAST(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) AS REAL) / COUNT(*), 2) AS ir + ROUND(CAST(COALESCE(SUM(_s7.n_rows), 0) AS REAL) / COUNT(*), 2) AS ir FROM main.countries AS countries JOIN main.devices AS devices ON countries.co_id = devices.de_production_country_id @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ROUND(CAST(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) AS REAL) / COUNT(*), 2) DESC, + ROUND(CAST(COALESCE(SUM(_s7.n_rows), 0) AS REAL) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql index ad09ee111..595401f9d 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ) SELECT products.pr_brand AS brand, - ROUND(COALESCE(COALESCE(SUM(_s3.n_rows), 0), 0) / COUNT(*), 2) AS ir + ROUND(COALESCE(SUM(_s3.n_rows), 0) / COUNT(*), 2) AS ir FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql index 864caeee4..6b499d7d4 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ) SELECT products.pr_brand AS brand, - ROUND(CAST(COALESCE(COALESCE(SUM(_s3.n_rows), 0), 0) AS REAL) / COUNT(*), 2) AS ir + ROUND(CAST(COALESCE(SUM(_s3.n_rows), 0) AS REAL) / COUNT(*), 2) AS ir FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql index 572bff6d9..56095322c 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql @@ -22,10 +22,10 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - ROUND(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) / _s5.n_rows, 2) AS ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ROUND(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) / _s5.n_rows, 2) DESC + ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql index 4f0d45267..41de9fe64 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql @@ -22,10 +22,10 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - ROUND(CAST(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) AS REAL) / _s5.n_rows, 2) AS ir + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ROUND(CAST(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) AS REAL) / _s5.n_rows, 2) DESC + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 67f82c489..0c77ea8b5 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) + 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) GROUP BY partsupp.ps_suppkey diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index c7f3110d7..4ffe16370 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) + 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) GROUP BY partsupp.ps_suppkey From 7884341155d3ece95a5d08c82d1a78f1bfa72ce5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 20:20:32 -0400 Subject: [PATCH 045/143] Improved null handling for aggregations --- .../conversion/relational_simplification.py | 8 +++-- .../multi_partition_access_5.txt | 33 +++++++++---------- .../multi_partition_access_6.txt | 2 +- tests/test_plan_refsols/simple_cross_3.txt | 2 +- tests/test_plan_refsols/triple_partition.txt | 2 +- 5 files changed, 25 insertions(+), 22 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 58414afcb..510d3675e 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -59,7 +59,7 @@ def simplify_function_call( if ( len(expr.inputs) == 1 and LogicalPredicate.NOT_NULL in arg_predicates[0] - and no_group_aggregate + and not no_group_aggregate ): output_predicates.add(LogicalPredicate.POSITIVE) case ( @@ -72,12 +72,16 @@ def simplify_function_call( | pydop.QUANTILE ): for predicate in [ - LogicalPredicate.NOT_NULL, LogicalPredicate.NOT_NEGATIVE, LogicalPredicate.POSITIVE, ]: if predicate in arg_predicates[0]: output_predicates.add(predicate) + if ( + LogicalPredicate.NOT_NULL in arg_predicates[0] + and not no_group_aggregate + ): + output_predicates.add(LogicalPredicate.NOT_NULL) case pydop.DEFAULT_TO: if LogicalPredicate.NOT_NULL in arg_predicates[0]: output_expr = expr.inputs[0] diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 9fad186da..0c443c6e8 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) - FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.sum_n_ticker_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / sum_n_ticker_type_trans > 0.8:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) +ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', sum_n_ticker_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + FILTER(condition=n_ticker_type_trans / sum_n_ticker_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.sum_n_ticker_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / sum_n_ticker_type_trans > 0.8:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 3576df074..8dedc5205 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -35,7 +35,7 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxType': sbTxType}) + FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) diff --git a/tests/test_plan_refsols/simple_cross_3.txt b/tests/test_plan_refsols/simple_cross_3.txt index 9c08e86d4..9ce8291db 100644 --- a/tests/test_plan_refsols/simple_cross_3.txt +++ b/tests/test_plan_refsols/simple_cross_3.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('supplier_nation', anything_anything_supplier_nation), ('customer_nation', anything_anything_n_name), ('nation_combinations', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) +ROOT(columns=[('supplier_nation', anything_anything_supplier_nation), ('customer_nation', anything_anything_n_name), ('nation_combinations', sum_n_rows)], orderings=[]) AGGREGATE(keys={'key_5': key_5, 'key_8': key_8, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_supplier_nation': ANYTHING(anything_supplier_nation), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_supplier_nation': t0.anything_supplier_nation, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'r_regionkey': t0.r_regionkey}) AGGREGATE(keys={'key_5': key_5, 'key_8': key_8, 'l_suppkey': l_suppkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_supplier_nation': ANYTHING(supplier_nation), 'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index a5ed33abe..7b3963130 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[(supp_region):asc_first]) - AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / DEFAULT_TO(sum_n_instances, 0:numeric))}) + AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / sum_n_instances)}) AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) From b281c8a621a2a6ee037d764398ea26362b4aba67 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 20:37:52 -0400 Subject: [PATCH 046/143] Added more simplification rules --- .../conversion/relational_simplification.py | 86 +++++++++++++++++++ .../agg_simplification_1.txt | 2 +- .../agg_simplification_1_ansi.sql | 8 +- .../agg_simplification_1_sqlite.sql | 8 +- 4 files changed, 95 insertions(+), 9 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 510d3675e..45698b619 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -42,6 +42,55 @@ class LogicalPredicate(Enum): POSITIVE = "POSITIVE" +NULL_PROPAGATING_OPS: set[pydop.PyDoughOperator] = { + pydop.ADD, + pydop.SUB, + pydop.MUL, + pydop.BAN, + pydop.BOR, + pydop.NOT, + pydop.LOWER, + pydop.UPPER, + pydop.LENGTH, + pydop.STRIP, + pydop.REPLACE, + pydop.FIND, + pydop.ABS, + pydop.CEIL, + pydop.FLOOR, + pydop.ROUND, + pydop.EQU, + pydop.NEQ, + pydop.GEQ, + pydop.GRT, + pydop.LET, + pydop.LEQ, + pydop.BXR, + pydop.STARTSWITH, + pydop.ENDSWITH, + pydop.CONTAINS, + pydop.LIKE, + pydop.SIGN, + pydop.SMALLEST, + pydop.LARGEST, + pydop.IFF, + pydop.YEAR, + pydop.MONTH, + pydop.DAY, + pydop.HOUR, + pydop.MINUTE, + pydop.SECOND, + pydop.DATEDIFF, + pydop.DAYNAME, + pydop.DAYOFWEEK, + pydop.SLICE, + pydop.LPAD, + pydop.RPAD, + pydop.MONOTONIC, + pydop.JOIN_STRINGS, +} + + def simplify_function_call( expr: CallExpression, arg_predicates: list[set[LogicalPredicate]], @@ -52,6 +101,9 @@ def simplify_function_call( """ output_expr: RelationalExpression = expr output_predicates: set[LogicalPredicate] = set() + if expr.op in NULL_PROPAGATING_OPS: + if all(LogicalPredicate.NOT_NULL in preds for preds in arg_predicates): + output_predicates.add(LogicalPredicate.NOT_NULL) match expr.op: case pydop.COUNT | pydop.NDISTINCT: output_predicates.add(LogicalPredicate.NOT_NULL) @@ -92,6 +144,40 @@ def simplify_function_call( for pred in arg_predicates[0]: if all(pred in preds for preds in arg_predicates): output_predicates.add(pred) + case pydop.ABS: + if ( + LogicalPredicate.POSITIVE in arg_predicates[0] + or LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] + ): + output_expr = expr.inputs[0] + output_predicates = arg_predicates[0] + else: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case ( + pydop.LENGTH + | pydop.BAN + | pydop.BOR + | pydop.BXR + | pydop.EQU + | pydop.NEQ + | pydop.GEQ + | pydop.GRT + | pydop.LET + | pydop.LEQ + | pydop.STARTSWITH + | pydop.ENDSWITH + | pydop.CONTAINS + | pydop.LIKE + | pydop.SQRT + ): + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.PRESENT: + if LogicalPredicate.NOT_NULL in arg_predicates[0]: + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.ABSENT: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) return output_expr, output_predicates diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt index a17af4c0d..feb52edbe 100644 --- a/tests/test_plan_refsols/agg_simplification_1.txt +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', 0:numeric), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) +ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', count_one * 2:numeric), ('su3', count_one * -1:numeric), ('su4', count_one * -3:numeric), ('su5', 0:numeric), ('su6', count_one * 0.5:numeric), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) AGGREGATE(keys={'aug_exchange': aug_exchange}, aggregations={'agg_63': QUANTILE(aug_exchange, 0.8:numeric), 'count_one': COUNT()}) PROJECT(columns={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_sql_refsols/agg_simplification_1_ansi.sql b/tests/test_sql_refsols/agg_simplification_1_ansi.sql index 8f1e739b8..1807f6d2e 100644 --- a/tests/test_sql_refsols/agg_simplification_1_ansi.sql +++ b/tests/test_sql_refsols/agg_simplification_1_ansi.sql @@ -1,11 +1,11 @@ SELECT LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, COUNT(*) AS su1, - COALESCE(COUNT(*) * 2, 0) AS su2, - COALESCE(COUNT(*) * -1, 0) AS su3, - COALESCE(COUNT(*) * -3, 0) AS su4, + COUNT(*) * 2 AS su2, + COUNT(*) * -1 AS su3, + COUNT(*) * -3 AS su4, 0 AS su5, - COALESCE(COUNT(*) * 0.5, 0) AS su6, + COUNT(*) * 0.5 AS su6, COALESCE(NULL, 0) AS su7, COALESCE( LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql index ab162413c..515f11664 100644 --- a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -154,11 +154,11 @@ WITH _t1 AS ( SELECT LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, COUNT(*) AS su1, - COALESCE(COUNT(*) * 2, 0) AS su2, - COALESCE(COUNT(*) * -1, 0) AS su3, - COALESCE(COUNT(*) * -3, 0) AS su4, + COUNT(*) * 2 AS su2, + COUNT(*) * -1 AS su3, + COUNT(*) * -3 AS su4, 0 AS su5, - COALESCE(COUNT(*) * 0.5, 0) AS su6, + COUNT(*) * 0.5 AS su6, COALESCE(NULL, 0) AS su7, COALESCE( LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), From 78f6e78f63e2b8cc705514e030c453ce89c2779f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 21:12:07 -0400 Subject: [PATCH 047/143] More >0 filter improvements --- .../conversion/relational_simplification.py | 62 ++++++++++++++++--- tests/test_plan_refsols/common_prefix_al.txt | 17 +++-- ...ch_overlapping_event_searches_per_user.txt | 23 ++++--- tests/test_plan_refsols/tpch_q20.txt | 21 +++---- ...erlapping_event_searches_per_user_ansi.sql | 9 +-- ...lapping_event_searches_per_user_sqlite.sql | 9 +-- tests/test_sql_refsols/tpch_q20_ansi.sql | 11 ++-- tests/test_sql_refsols/tpch_q20_sqlite.sql | 11 ++-- 8 files changed, 95 insertions(+), 68 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 45698b619..cceec7bf4 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -108,10 +108,8 @@ def simplify_function_call( case pydop.COUNT | pydop.NDISTINCT: output_predicates.add(LogicalPredicate.NOT_NULL) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - if ( - len(expr.inputs) == 1 - and LogicalPredicate.NOT_NULL in arg_predicates[0] - and not no_group_aggregate + if not no_group_aggregate and ( + len(expr.inputs) == 0 or LogicalPredicate.NOT_NULL in arg_predicates[0] ): output_predicates.add(LogicalPredicate.POSITIVE) case ( @@ -158,19 +156,36 @@ def simplify_function_call( | pydop.BAN | pydop.BOR | pydop.BXR - | pydop.EQU - | pydop.NEQ - | pydop.GEQ - | pydop.GRT - | pydop.LET - | pydop.LEQ | pydop.STARTSWITH | pydop.ENDSWITH | pydop.CONTAINS | pydop.LIKE | pydop.SQRT + | pydop.MONOTONIC ): output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: + match (expr.op, expr.inputs[1]): + case (pydop.GRT, LiteralExpression()) if ( + expr.inputs[1].value == 0 + and LogicalPredicate.POSITIVE in arg_predicates[0] + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.add(LogicalPredicate.POSITIVE) + case (pydop.GEQ, LiteralExpression()) if ( + expr.inputs[1].value == 0 + and LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.add(LogicalPredicate.POSITIVE) + case _: + pass + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.PRESENT: if LogicalPredicate.NOT_NULL in arg_predicates[0]: output_expr = LiteralExpression(True, expr.data_type) @@ -189,6 +204,32 @@ def simplify_window_call( TODO """ output_predicates: set[LogicalPredicate] = set() + no_frame: bool = not ( + expr.kwargs.get("cumulative", False) or "frame" in expr.kwargs + ) + match expr.op: + case pydop.RANKING | pydop.PERCENTILE: + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.add(LogicalPredicate.POSITIVE) + case pydop.RELSUM | pydop.RELAVG: + if LogicalPredicate.NOT_NULL in arg_predicates[0] and no_frame: + output_predicates.add(LogicalPredicate.NOT_NULL) + if LogicalPredicate.NOT_NEGATIVE in arg_predicates[0]: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if LogicalPredicate.POSITIVE in arg_predicates[0] and no_frame: + output_predicates.add(LogicalPredicate.POSITIVE) + case pydop.RELSIZE: + if no_frame: + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.RELCOUNT: + if no_frame: + output_predicates.add(LogicalPredicate.NOT_NULL) + if LogicalPredicate.NOT_NULL in arg_predicates[0]: + output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) return expr, output_predicates @@ -367,6 +408,7 @@ def simplify_expressions( and expr.input_name != node.default_input_aliases[0] ): preds.discard(LogicalPredicate.NOT_NULL) + preds.discard(LogicalPredicate.POSITIVE) case Aggregate(): for name, expr in node.keys.items(): ref_expr = ColumnReference(name, expr.data_type) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 05d4bb7bd..b00157d0e 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -15,12 +15,11 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discou SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - FILTER(condition=n_rows > 0:numeric, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt index 91e424686..dbc3fc089 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt @@ -1,14 +1,13 @@ ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_searches)], orderings=[(n_searches):desc_last, (anything_anything_user_name):asc_first], limit=4:numeric) AGGREGATE(keys={'anything_user_id': anything_user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_searches': COUNT()}) - FILTER(condition=n_rows > 0:numeric, columns={'anything_user_id': anything_user_id, 'anything_user_name': anything_user_name}) - AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_user_id': ANYTHING(user_id), 'anything_user_name': ANYTHING(user_name), 'n_rows': COUNT()}) - FILTER(condition=name_9 != user_name, columns={'search_id': search_id, 'user_id': user_id, 'user_name': user_name}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=EVENTS, columns={'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_user_id': ANYTHING(user_id), 'anything_user_name': ANYTHING(user_name)}) + FILTER(condition=name_9 != user_name, columns={'search_id': search_id, 'user_id': user_id, 'user_name': user_name}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=EVENTS, columns={'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 0ce79583c..c5975d83f 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -4,14 +4,13 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric), columns={'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_ansi.sql b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_ansi.sql index 2ec30bec6..335d943d0 100644 --- a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_ansi.sql +++ b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_ansi.sql @@ -3,11 +3,10 @@ WITH _s0 AS ( user_id, user_name FROM users -), _t2 AS ( +), _t1 AS ( SELECT ANY_VALUE(_s0.user_id) AS anything_user_id, - ANY_VALUE(_s0.user_name) AS anything_user_name, - COUNT(*) AS n_rows + ANY_VALUE(_s0.user_name) AS anything_user_name FROM _s0 AS _s0 JOIN searches AS searches ON _s0.user_id = searches.search_user_id @@ -24,9 +23,7 @@ WITH _s0 AS ( SELECT ANY_VALUE(anything_user_name) AS user_name, COUNT(*) AS n_searches -FROM _t2 -WHERE - n_rows > 0 +FROM _t1 GROUP BY anything_user_id ORDER BY diff --git a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_sqlite.sql b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_sqlite.sql index a31a1ac1d..1f1901bc0 100644 --- a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_sqlite.sql +++ b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_sqlite.sql @@ -3,11 +3,10 @@ WITH _s0 AS ( user_id, user_name FROM users -), _t2 AS ( +), _t1 AS ( SELECT MAX(_s0.user_id) AS anything_user_id, - MAX(_s0.user_name) AS anything_user_name, - COUNT(*) AS n_rows + MAX(_s0.user_name) AS anything_user_name FROM _s0 AS _s0 JOIN searches AS searches ON _s0.user_id = searches.search_user_id @@ -28,9 +27,7 @@ WITH _s0 AS ( SELECT MAX(anything_user_name) AS user_name, COUNT(*) AS n_searches -FROM _t2 -WHERE - n_rows > 0 +FROM _t1 GROUP BY anything_user_id ORDER BY diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 0c77ea8b5..7b8888ca6 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -16,9 +16,8 @@ WITH _s3 AS ( ON _s3.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _t2 AS ( - SELECT - COUNT(*) AS n_rows, +), _s7 AS ( + SELECT DISTINCT partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 @@ -26,8 +25,6 @@ WITH _s3 AS ( AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) - GROUP BY - partsupp.ps_suppkey ) SELECT supplier.s_name AS S_NAME, @@ -35,8 +32,8 @@ SELECT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _t2 AS _t2 - ON _t2.n_rows > 0 AND _t2.ps_suppkey = supplier.s_suppkey +JOIN _s7 AS _s7 + ON _s7.ps_suppkey = supplier.s_suppkey ORDER BY s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index 4ffe16370..cb3029133 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -16,9 +16,8 @@ WITH _s3 AS ( ON _s3.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _t2 AS ( - SELECT - COUNT(*) AS n_rows, +), _s7 AS ( + SELECT DISTINCT partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 @@ -26,8 +25,6 @@ WITH _s3 AS ( AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) - GROUP BY - partsupp.ps_suppkey ) SELECT supplier.s_name AS S_NAME, @@ -35,8 +32,8 @@ SELECT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _t2 AS _t2 - ON _t2.n_rows > 0 AND _t2.ps_suppkey = supplier.s_suppkey +JOIN _s7 AS _s7 + ON _s7.ps_suppkey = supplier.s_suppkey ORDER BY s_name LIMIT 10 From e8a54b8da728eef812244f511bc964932b5816ab Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 19 Jul 2025 01:17:36 -0400 Subject: [PATCH 048/143] Added IFF and KEEP_IF rules --- .../conversion/relational_simplification.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index cceec7bf4..d807853a6 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -193,6 +193,38 @@ def simplify_function_call( output_predicates.add(LogicalPredicate.NOT_NEGATIVE) case pydop.ABSENT: output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.IFF: + if isinstance(expr.inputs[0], LiteralExpression): + if bool(expr.inputs[0].value): + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] + else: + output_expr = expr.inputs[2] + output_predicates = arg_predicates[2] + elif ( + LogicalPredicate.POSITIVE in arg_predicates[0] + and LogicalPredicate.NOT_NULL in arg_predicates[0] + ): + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] + else: + output_predicates = arg_predicates[1] & arg_predicates[2] + case pydop.KEEP_IF: + if isinstance(expr.inputs[1], LiteralExpression): + if bool(expr.inputs[1].value): + output_expr = expr.inputs[0] + output_predicates = arg_predicates[0] + else: + output_expr = LiteralExpression(None, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + elif ( + LogicalPredicate.POSITIVE in arg_predicates[1] + and LogicalPredicate.NOT_NULL in arg_predicates[1] + ): + output_expr = expr.inputs[0] + output_predicates = arg_predicates[0] + elif LogicalPredicate.NOT_NEGATIVE in arg_predicates[0]: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) return output_expr, output_predicates From b8a7f77ad2a4cbd396805ee9ed2202228b89ef5f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 11:00:48 -0400 Subject: [PATCH 049/143] Added more simplification patterns and tests --- .../conversion/relational_simplification.py | 24 +++++++- tests/test_pipeline_defog_custom.py | 61 +++++++++++++++++++ tests/test_pipeline_pagerank.py | 28 ++++----- tests/test_plan_refsols/simplification_1.txt | 3 + tests/testing_utilities.py | 41 ++++++++----- 5 files changed, 125 insertions(+), 32 deletions(-) create mode 100644 tests/test_plan_refsols/simplification_1.txt diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index d807853a6..70e8ef858 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -108,10 +108,19 @@ def simplify_function_call( case pydop.COUNT | pydop.NDISTINCT: output_predicates.add(LogicalPredicate.NOT_NULL) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - if not no_group_aggregate and ( - len(expr.inputs) == 0 or LogicalPredicate.NOT_NULL in arg_predicates[0] + if not no_group_aggregate: + if ( + len(expr.inputs) == 0 + or LogicalPredicate.NOT_NULL in arg_predicates[0] + ): + output_predicates.add(LogicalPredicate.POSITIVE) + elif ( + expr.op == pydop.COUNT + and len(expr.inputs) == 1 + and LogicalPredicate.NOT_NULL in arg_predicates[0] ): output_predicates.add(LogicalPredicate.POSITIVE) + output_expr = CallExpression(pydop.COUNT, expr.data_type, []) case ( pydop.SUM | pydop.AVG @@ -132,6 +141,17 @@ def simplify_function_call( and not no_group_aggregate ): output_predicates.add(LogicalPredicate.NOT_NULL) + case pydop.ADD | pydop.MUL | pydop.DIV: + for predicate in [LogicalPredicate.NOT_NEGATIVE, LogicalPredicate.POSITIVE]: + if all(predicate in preds for preds in arg_predicates): + output_predicates.add(predicate) + if expr.op == pydop.DIV: + if ( + LogicalPredicate.NOT_NULL in arg_predicates[0] + and LogicalPredicate.NOT_NULL in arg_predicates[1] + and LogicalPredicate.POSITIVE in arg_predicates[1] + ): + output_predicates.add(LogicalPredicate.NOT_NULL) case pydop.DEFAULT_TO: if LogicalPredicate.NOT_NULL in arg_predicates[0]: output_expr = expr.inputs[0] diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index c127a57e4..4e45fa868 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -1587,6 +1587,67 @@ def get_day_of_week( ), id="get_part_single", ), + pytest.param( + PyDoughPandasTest( + "result = Broker.CALCULATE(" + " s00 = ABS(13)," # -> 13 + " s01 = ABS(0)," # -> 0 + " s02 = ABS(COUNT(customers))," # -> COUNT(customers) + " s03 = ABS(COUNT(customers) + 5)," # -> COUNT(customers) + 5 + " s04 = ABS(COUNT(customers) * 2)," # -> COUNT(customers) * 2 + " s05 = ABS(COUNT(customers) / 8.0)," # -> COUNT(customers) / 8.0 + " s06 = DEFAULT_TO(10, 0)," # -> 10 + " s07 = DEFAULT_TO(COUNT(customers), 0)," # -> COUNT(customers) + " s08 = DEFAULT_TO(ABS(COUNT(customers) - 25), 0)," # -> ABS(COUNT(customers) - 25) + " s09 = DEFAULT_TO(COUNT(customers) + 1, 0)," # -> COUNT(customers) + 1 + " s10 = DEFAULT_TO(COUNT(customers) - 3, 0)," # -> COUNT(customers) - 3 + " s11 = DEFAULT_TO(COUNT(customers) * -1, 0)," # -> COUNT(customers) * -1 + " s12 = DEFAULT_TO(COUNT(customers) / 2.5, 0)," # -> COUNT(customers) / 2.5 + " s13 = DEFAULT_TO(COUNT(customers) > 10, False)," # -> COUNT(customers) > 10 + " s14 = DEFAULT_TO(COUNT(customers) >= 10, False)," # -> COUNT(customers) >= 10 + " s15 = DEFAULT_TO(COUNT(customers) == 20, False)," # -> COUNT(customers) == 10 + " s16 = DEFAULT_TO(COUNT(customers) != 25, False)," # -> COUNT(customers) != 20 + " s17 = DEFAULT_TO(COUNT(customers) < 25, False)," # -> COUNT(customers) < 25 + " s18 = DEFAULT_TO(COUNT(customers) <= 25, False)," # -> COUNT(customers) <= 25 + " s19 = COUNT(DEFAULT_TO(customers.name, ''))," # -> COUNT(customers) + " s20 = ABS(DEFAULT_TO(AVG(ABS(DEFAULT_TO(LENGTH(customers.name), 0))), 0))," # -> AVG(DEFAULT_TO(LENGTH(customers.name), '')) + " s21 = PRESENT(COUNT(customers))," # -> True + " s22 = PRESENT(1) >= 0," # -> True + " s23 = ABSENT(1) >= 0," # -> True + ")", + "Broker", + lambda: pd.DataFrame( + { + "s00": [13], + "s01": [0], + "s02": [20], + "s03": [25], + "s04": [40], + "s05": [2.5], + "s06": [10], + "s07": [20], + "s08": [5], + "s09": [21], + "s10": [17], + "s11": [-20], + "s12": [8.0], + "s13": [1], + "s14": [1], + "s15": [1], + "s16": [1], + "s17": [1], + "s18": [1], + "s19": [20], + "s20": [12.3], + "s21": [1], + "s22": [1], + "s23": [1], + } + ), + "simplification_1", + ), + id="simplification_1", + ), ], ) def defog_custom_pipeline_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index 8d37255a6..b5e90efd7 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -28,7 +28,7 @@ ), "pagerank_a0", order_sensitive=True, - args=[0], + kwargs={"n_iters": 0}, ), id="pagerank_a0", ), @@ -44,7 +44,7 @@ ), "pagerank_a1", order_sensitive=True, - args=[1], + kwargs={"n_iters": 1}, ), id="pagerank_a1", ), @@ -60,7 +60,7 @@ ), "pagerank_a2", order_sensitive=True, - args=[2], + kwargs={"n_iters": 2}, ), id="pagerank_a2", ), @@ -76,7 +76,7 @@ ), "pagerank_a6", order_sensitive=True, - args=[6], + kwargs={"n_iters": 6}, ), id="pagerank_a6", ), @@ -94,7 +94,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[0], + kwargs={"n_iters": 0}, ), id="pagerank_b0", ), @@ -112,7 +112,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[1], + kwargs={"n_iters": 1}, ), id="pagerank_b1", ), @@ -128,7 +128,7 @@ ), "pagerank_b3", order_sensitive=True, - args=[3], + kwargs={"n_iters": 3}, ), id="pagerank_b3", ), @@ -153,7 +153,7 @@ ), "pagerank_c4", order_sensitive=True, - args=[4], + kwargs={"n_iters": 4}, ), id="pagerank_c4", ), @@ -188,7 +188,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[1], + kwargs={"n_iters": 1}, ), id="pagerank_d1", ), @@ -221,7 +221,7 @@ ), "pagerank_d5", order_sensitive=True, - args=[5], + kwargs={"n_iters": 5}, ), id="pagerank_d5", ), @@ -239,7 +239,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[1], + kwargs={"n_iters": 1}, ), id="pagerank_e1", ), @@ -257,7 +257,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[2], + kwargs={"n_iters": 2}, ), id="pagerank_f2", ), @@ -283,7 +283,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[5], + kwargs={"n_iters": 5}, ), id="pagerank_g5", ), @@ -350,7 +350,7 @@ ), "pagerank_h8", order_sensitive=True, - args=[8], + kwargs={"n_iters": 8}, ), id="pagerank_h8", ), diff --git a/tests/test_plan_refsols/simplification_1.txt b/tests/test_plan_refsols/simplification_1.txt new file mode 100644 index 000000000..9a342e6c4 --- /dev/null +++ b/tests/test_plan_refsols/simplification_1.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('s00', 13:numeric), ('s01', 0:numeric), ('s02', n_rows), ('s03', n_rows + 5:numeric), ('s04', n_rows * 2:numeric), ('s05', n_rows / 8.0:numeric), ('s06', 10:numeric), ('s07', n_rows), ('s08', ABS(n_rows - 25:numeric)), ('s09', n_rows + 1:numeric), ('s10', n_rows - 3:numeric), ('s11', n_rows * -1:numeric), ('s12', n_rows / 2.5:numeric), ('s13', n_rows > 10:numeric), ('s14', n_rows >= 10:numeric), ('s15', n_rows == 20:numeric), ('s16', n_rows != 25:numeric), ('s17', n_rows < 25:numeric), ('s18', n_rows <= 25:numeric), ('s19', n_rows), ('s20', DEFAULT_TO(avg_expr_4, 0:numeric)), ('s21', True:bool), ('s22', True:bool), ('s23', True:bool)], orderings=[]) + AGGREGATE(keys={}, aggregations={'avg_expr_4': AVG(DEFAULT_TO(LENGTH(sbCustName), 0:numeric)), 'n_rows': COUNT()}) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 2b5750cb1..c747cc044 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -904,25 +904,33 @@ def make_relational_ordering( def transform_and_exec_pydough( - pydough_impl: Callable[..., UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode] | str, graph: GraphMetadata, - args: list[Any] | None, + kwargs: dict | None, ) -> UnqualifiedNode: """ Obtains the unqualified node from a PyDough function by invoking the - decorator to transform it, then calling the transformed function. + decorator to transform it (or evaluating the string if provided), then + calling the transformed function. Args: - `pydough_impl`: The PyDough function to be transformed and executed. + `pydough_impl`: The PyDough function to be transformed and executed, + or the string containing the PyDough code to be executed. `graph`: The metadata being used. - `args`: The arguments to pass to the PyDough function, if any. + `kwargs`: The keyword arguments to pass to the PyDough function, if + any. Returns: The unqualified node created by running the transformed version of `pydough_impl`. """ - args = args if args is not None else [] - return init_pydough_context(graph)(pydough_impl)(*args) + kwargs = kwargs if kwargs is not None else {} + if isinstance(pydough_impl, str): + # If the pydough_impl is a string, parse it with pydough.from_string. + return pydough.from_string(pydough_impl, metadata=graph, environment=kwargs) + else: + # OTherwise, transform the function with the decorator and call it. + return init_pydough_context(graph)(pydough_impl)(**kwargs) @dataclass @@ -1030,7 +1038,7 @@ class PyDoughPandasTest: a function that returns a Pandas DataFrame. The dataclass contains the following fields: - `pydough_function`: the function that returns the PyDough code evaluated - by the unit test. + by the unit test, or a string representing the PyDough code. - `graph_name`: the name of the graph that the PyDough code will use. - `pd_function`: the function that returns the Pandas DataFrame that should be used as the reference solution. @@ -1049,9 +1057,10 @@ class PyDoughPandasTest: testing. Default is False. """ - pydough_function: Callable[..., UnqualifiedNode] + pydough_function: Callable[..., UnqualifiedNode] | str """ - Function that returns the PyDough code evaluated by the unit test. + Function that returns the PyDough code evaluated by the unit test, or a + string representing the PyDough code. """ graph_name: str @@ -1088,10 +1097,10 @@ class PyDoughPandasTest: same column names as in the reference solution. """ - args: list[Any] | None = None + kwargs: dict | None = None """ - Any additional arguments to pass to the PyDough function when - executing it. If None, no additional arguments are passed. + Any additional keyword arguments to pass to the PyDough function when + executing it. If None, no additional keyword arguments are passed. """ skip_relational: bool = False @@ -1132,7 +1141,7 @@ def run_relational_test( # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( - self.pydough_function, graph, self.args + self.pydough_function, graph, self.kwargs ) # Run the PyDough code through the pipeline up until it is converted to @@ -1190,7 +1199,7 @@ def run_sql_test( # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( - self.pydough_function, graph, self.args + self.pydough_function, graph, self.kwargs ) # Convert the PyDough code to SQL text @@ -1235,7 +1244,7 @@ def run_e2e_test( # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( - self.pydough_function, graph, self.args + self.pydough_function, graph, self.kwargs ) # Obtain the DataFrame result from the PyDough code From bc5f383d52efaf51ae48d5658b8d2b8b07393f65 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 11:15:55 -0400 Subject: [PATCH 050/143] Minor refactoring --- tests/test_pipeline_defog_custom.py | 51 ++++++++++++++++++- tests/test_pydough_to_sql.py | 18 ------- .../simplification_1_ansi.sql | 26 ++++++++++ .../simplification_1_sqlite.sql | 26 ++++++++++ 4 files changed, 101 insertions(+), 20 deletions(-) create mode 100644 tests/test_sql_refsols/simplification_1_ansi.sql create mode 100644 tests/test_sql_refsols/simplification_1_sqlite.sql diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index 4e45fa868..9f0a587c0 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -11,7 +11,7 @@ from pydough import init_pydough_context, to_df, to_sql from pydough.configs import DayOfWeek, PyDoughConfigs -from pydough.database_connectors import DatabaseContext +from pydough.database_connectors import DatabaseContext, DatabaseDialect from pydough.metadata import GraphMetadata from pydough.unqualified import ( UnqualifiedNode, @@ -145,6 +145,7 @@ def get_day_of_week( {"symbol": ["AAPL", "AMZN", "BRK.B", "FB", "GOOG"]} ), "multi_partition_access_1", + skip_sql=True, ), id="multi_partition_access_1", ), @@ -176,6 +177,7 @@ def get_day_of_week( } ), "multi_partition_access_2", + skip_sql=True, ), id="multi_partition_access_2", ), @@ -212,6 +214,7 @@ def get_day_of_week( } ), "multi_partition_access_3", + skip_sql=True, ), id="multi_partition_access_3", ), @@ -228,6 +231,7 @@ def get_day_of_week( } ), "multi_partition_access_4", + skip_sql=True, ), id="multi_partition_access_4", ), @@ -267,6 +271,7 @@ def get_day_of_week( } ), "multi_partition_access_5", + skip_sql=True, ), id="multi_partition_access_5", ), @@ -300,6 +305,7 @@ def get_day_of_week( } ), "multi_partition_access_6", + skip_sql=True, ), id="multi_partition_access_6", ), @@ -492,6 +498,7 @@ def get_day_of_week( } ), "cumulative_stock_analysis", + skip_sql=True, ), id="cumulative_stock_analysis", ), @@ -516,6 +523,7 @@ def get_day_of_week( } ), "time_threshold_reached", + skip_sql=True, ), id="time_threshold_reached", ), @@ -547,6 +555,7 @@ def get_day_of_week( } ), "hour_minute_day", + skip_sql=True, ), id="hour_minute_day", ), @@ -595,6 +604,7 @@ def get_day_of_week( } ), "exponentiation", + skip_sql=True, ), id="exponentiation", ), @@ -747,6 +757,7 @@ def get_day_of_week( } ), "years_months_days_hours_datediff", + skip_sql=True, ), id="years_months_days_hours_datediff", ), @@ -856,6 +867,7 @@ def get_day_of_week( } ), "minutes_seconds_datediff", + skip_sql=True, ), id="minutes_seconds_datediff", ), @@ -907,6 +919,7 @@ def get_day_of_week( ), ), "padding_functions", + skip_sql=True, ), id="padding_functions", ), @@ -985,6 +998,7 @@ def get_day_of_week( wo_step9=lambda x: x["name"].str[2:2], ), "step_slicing", + skip_sql=True, ), id="step_slicing", ), @@ -1004,6 +1018,7 @@ def get_day_of_week( sign_high_zero=0, ), "sign", + skip_sql=True, ), id="sign", ), @@ -1026,6 +1041,7 @@ def get_day_of_week( } ), "find", + skip_sql=True, ), id="find", ), @@ -1045,6 +1061,7 @@ def get_day_of_week( } ), "strip", + skip_sql=True, ), id="strip", ), @@ -1074,6 +1091,7 @@ def get_day_of_week( } ), "replace", + skip_sql=True, ), id="replace", ), @@ -1104,6 +1122,7 @@ def get_day_of_week( } ), "str_count", + skip_sql=True, ), id="str_count", ), @@ -1135,6 +1154,7 @@ def get_day_of_week( } ), "get_part_multiple", + skip_sql=True, ), id="get_part_multiple", ), @@ -1408,6 +1428,7 @@ def get_day_of_week( } ), "week_offset", + skip_sql=True, ), id="week_offset", ), @@ -1431,6 +1452,7 @@ def get_day_of_week( } ), "window_sliding_frame_relsize", + skip_sql=True, ), id="window_sliding_frame_relsize", ), @@ -1454,6 +1476,7 @@ def get_day_of_week( } ), "window_sliding_frame_relsum", + skip_sql=True, ), id="window_sliding_frame_relsum", ), @@ -1659,7 +1682,7 @@ def defog_custom_pipeline_test_data(request) -> PyDoughPandasTest: return request.param -def test_pipeline_until_relational_defog( +def test_pipeline_until_relational_defog_custom( defog_custom_pipeline_test_data: PyDoughPandasTest, defog_graphs: graph_fetcher, get_plan_test_filename: Callable[[str], str], @@ -1676,6 +1699,30 @@ def test_pipeline_until_relational_defog( ) +def test_pipeline_until_sql_defog_custom( + defog_custom_pipeline_test_data: PyDoughPandasTest, + defog_graphs: graph_fetcher, + empty_context_database: DatabaseContext, + defog_config: PyDoughConfigs, + get_sql_test_filename: Callable[[str, DatabaseDialect], str], + update_tests: bool, +): + """ + Tests that the PyDough queries from `defog_custom_pipeline_test_data` + generate correct SQL text. + """ + file_path: str = get_sql_test_filename( + defog_custom_pipeline_test_data.test_name, empty_context_database.dialect + ) + defog_custom_pipeline_test_data.run_sql_test( + defog_graphs, + file_path, + update_tests, + empty_context_database, + config=defog_config, + ) + + @pytest.mark.execute def test_pipeline_e2e_defog_custom( defog_custom_pipeline_test_data: PyDoughPandasTest, diff --git a/tests/test_pydough_to_sql.py b/tests/test_pydough_to_sql.py index ab3116296..493087db9 100644 --- a/tests/test_pydough_to_sql.py +++ b/tests/test_pydough_to_sql.py @@ -26,8 +26,6 @@ window_functions, ) from tests.test_pydough_functions.simple_pydough_functions import ( - agg_simplification_1, - agg_simplification_2, cumulative_stock_analysis, datediff, datetime_sampler, @@ -35,7 +33,6 @@ floor_and_ceil, floor_and_ceil_2, get_part_multiple, - get_part_single, global_acctbal_breakdown, hour_minute_day, nation_acctbal_breakdown, @@ -247,18 +244,6 @@ def test_pydough_to_sql_tpch( "Broker", id="week_offset", ), - pytest.param( - agg_simplification_1, - "agg_simplification_1", - "Broker", - id="agg_simplification_1", - ), - pytest.param( - agg_simplification_2, - "agg_simplification_2", - "Broker", - id="agg_simplification_2", - ), pytest.param( cumulative_stock_analysis, "cumulative_stock_analysis", @@ -283,9 +268,6 @@ def test_pydough_to_sql_tpch( "Broker", id="window_sliding_frame_relsum", ), - pytest.param( - get_part_single, "get_part_single", "Broker", id="get_part_single" - ), pytest.param( get_part_multiple, "get_part_multiple", "Broker", id="get_part_multiple" ), diff --git a/tests/test_sql_refsols/simplification_1_ansi.sql b/tests/test_sql_refsols/simplification_1_ansi.sql new file mode 100644 index 000000000..a07916fd6 --- /dev/null +++ b/tests/test_sql_refsols/simplification_1_ansi.sql @@ -0,0 +1,26 @@ +SELECT + 13 AS s00, + 0 AS s01, + COUNT(*) AS s02, + COUNT(*) + 5 AS s03, + COUNT(*) * 2 AS s04, + COUNT(*) / 8.0 AS s05, + 10 AS s06, + COUNT(*) AS s07, + ABS(COUNT(*) - 25) AS s08, + COUNT(*) + 1 AS s09, + COUNT(*) - 3 AS s10, + COUNT(*) * -1 AS s11, + COUNT(*) / 2.5 AS s12, + COUNT(*) > 10 AS s13, + COUNT(*) >= 10 AS s14, + COUNT(*) = 20 AS s15, + COUNT(*) <> 25 AS s16, + COUNT(*) < 25 AS s17, + COUNT(*) <= 25 AS s18, + COUNT(*) AS s19, + COALESCE(AVG(COALESCE(LENGTH(sbcustname), 0)), 0) AS s20, + TRUE AS s21, + TRUE AS s22, + TRUE AS s23 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_1_sqlite.sql b/tests/test_sql_refsols/simplification_1_sqlite.sql new file mode 100644 index 000000000..39f87c5f9 --- /dev/null +++ b/tests/test_sql_refsols/simplification_1_sqlite.sql @@ -0,0 +1,26 @@ +SELECT + 13 AS s00, + 0 AS s01, + COUNT(*) AS s02, + COUNT(*) + 5 AS s03, + COUNT(*) * 2 AS s04, + CAST(COUNT(*) AS REAL) / 8.0 AS s05, + 10 AS s06, + COUNT(*) AS s07, + ABS(COUNT(*) - 25) AS s08, + COUNT(*) + 1 AS s09, + COUNT(*) - 3 AS s10, + COUNT(*) * -1 AS s11, + CAST(COUNT(*) AS REAL) / 2.5 AS s12, + COUNT(*) > 10 AS s13, + COUNT(*) >= 10 AS s14, + COUNT(*) = 20 AS s15, + COUNT(*) <> 25 AS s16, + COUNT(*) < 25 AS s17, + COUNT(*) <= 25 AS s18, + COUNT(*) AS s19, + COALESCE(AVG(COALESCE(LENGTH(sbcustname), 0)), 0) AS s20, + TRUE AS s21, + TRUE AS s22, + TRUE AS s23 +FROM main.sbcustomer From a828aa99d858308ec8f0d344fe170746c6e8ba38 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 11:33:18 -0400 Subject: [PATCH 051/143] Fixing double-TPCH error handling --- pydough/unqualified/qualification.py | 3 ++- tests/test_qualification_errors.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index 8179436c0..9c74c30b5 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -580,6 +580,7 @@ def qualify_access( if ( isinstance(qualified_parent, GlobalContext) and name == qualified_parent.graph.name + and not is_child ) or ( isinstance(qualified_parent, ChildOperatorChildAccess) and isinstance(qualified_parent.child_access, GlobalContext) @@ -1007,7 +1008,7 @@ def qualify_partition( unqualified_parent, None ) qualified_parent: PyDoughCollectionQDAG = self.qualify_collection( - unqualified_parent, context, True, is_cross + unqualified_parent, context, False, is_cross ) qualified_child: PyDoughCollectionQDAG = self.qualify_collection( unqualified_child, qualified_parent, True, is_cross diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index 8446904a0..e40cb94c0 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -180,6 +180,11 @@ "Expected 2 arguments, received 1", id="bad_str_count_few_args", ), + pytest.param( + "result = TPCH.CALCULATE(x=COUNT(TPCH.nations) / COUNT(TPCH.regions))", + "Unrecognized term of TPCH: 'TPCH'. Did you mean: lines, parts, orders, nations, regions?", + id="double_graph", + ), ], ) def test_qualify_error( From 2914f9b4774adabf298300117b8444f17e0d25ee Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 12:22:54 -0400 Subject: [PATCH 052/143] overhauling some of the function call creation and error handling --- .../expression_function_operators.py | 4 +- .../pydough_operators/operator_registry.py | 24 +- pydough/unqualified/qualification.py | 18 +- pydough/unqualified/unqualified_node.py | 233 +++++++++--------- .../window_filter_order_8.txt | 2 +- .../window_filter_order_9.txt | 2 +- tests/test_relational.py | 4 +- tests/test_relational_expressions.py | 4 +- tests/test_unqualified_node.py | 3 +- 9 files changed, 139 insertions(+), 155 deletions(-) diff --git a/pydough/pydough_operators/expression_operators/expression_function_operators.py b/pydough/pydough_operators/expression_operators/expression_function_operators.py index f7fa76fa7..e5b3eaef2 100644 --- a/pydough/pydough_operators/expression_operators/expression_function_operators.py +++ b/pydough/pydough_operators/expression_operators/expression_function_operators.py @@ -33,7 +33,7 @@ def __init__( @property def key(self) -> str: - return f"FUNCTION-{self.function_name}" + return self.function_name @property def is_aggregation(self) -> bool: @@ -45,7 +45,7 @@ def function_name(self) -> str: @property def standalone_string(self) -> str: - return f"Function[{self.function_name}]" + return self.function_name def requires_enclosing_parens(self, parent) -> bool: return False diff --git a/pydough/pydough_operators/operator_registry.py b/pydough/pydough_operators/operator_registry.py index 2042bcaf7..dd88b02bc 100644 --- a/pydough/pydough_operators/operator_registry.py +++ b/pydough/pydough_operators/operator_registry.py @@ -11,7 +11,6 @@ from .base_operator import PyDoughOperator from .expression_operators import ( ExpressionFunctionOperator, - KeywordBranchingExpressionFunctionOperator, ) from .expression_operators import registered_expression_operators as REP @@ -58,27 +57,6 @@ def get_operator_by_name(name: str, **kwargs) -> ExpressionFunctionOperator: # Find the operator directly using inspect for op_name, obj in inspect.getmembers(REP): if op_name == name and op_name in REP.__all__ and obj.public: - operator = obj - break + return obj else: raise PyDoughUnqualifiedException(f"Operator {name} not found.") - - # Check if this is a keyword branching operator - if isinstance(operator, KeywordBranchingExpressionFunctionOperator): - # Find the matching implementation based on kwargs - impl: ExpressionFunctionOperator | None = operator.find_matching_implementation( - kwargs - ) - if impl is None: - kwarg_str = ", ".join(f"{k}={v!r}" for k, v in kwargs.items()) - raise PyDoughUnqualifiedException( - f"No matching implementation found for {name}({kwarg_str})." - ) - return impl - elif len(kwargs) > 0: - raise PyDoughUnqualifiedException( - f"PyDough function call {name} does not support " - "keyword arguments at this time." - ) - - return operator diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index 9c74c30b5..fa269ad9d 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -8,12 +8,11 @@ from collections.abc import Iterable import pydough +import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs from pydough.errors import PyDoughUnqualifiedException from pydough.metadata import GeneralJoinMetadata, GraphMetadata -from pydough.pydough_operators import get_operator_by_name from pydough.pydough_operators.expression_operators import ( - BinOp, ExpressionFunctionOperator, ExpressionWindowOperator, ) @@ -195,10 +194,7 @@ def qualify_binary_operation( goes wrong during the qualification process, e.g. a term cannot be qualified or is not recognized. """ - # Iterate across all the values of the BinOp enum to figure out which - # one correctly matches the BinOp specified by the operator. - operation: str = BinOp.from_string(unqualified._parcel[0]).name - operator = get_operator_by_name(operation) + operator: pydop.BinaryOperator = unqualified._parcel[0] # Independently qualify the LHS and RHS arguments unqualified_lhs: UnqualifiedNode = unqualified._parcel[1] unqualified_rhs: UnqualifiedNode = unqualified._parcel[2] @@ -439,7 +435,6 @@ def qualify_join_condition( The PyDough QDAG object for the qualified expression node for `condition`. """ - operation: str | None = None raw_term: PyDoughQDAG term: PyDoughExpressionQDAG term_name: str @@ -452,8 +447,7 @@ def qualify_join_condition( # qualification of binary operators except with using # `qualify_join_condition` on the inputs instead of # `qualify_expression`. - operation = BinOp.from_string(condition._parcel[0]).name - operator = get_operator_by_name(operation) + binop: pydop.BinaryOperator = condition._parcel[0] qualified_lhs: PyDoughExpressionQDAG = self.qualify_join_condition( condition._parcel[1], access, self_name, other_name ) @@ -461,14 +455,14 @@ def qualify_join_condition( condition._parcel[2], access, self_name, other_name ) return self.builder.build_expression_function_call( - operator, [qualified_lhs, qualified_rhs] + binop, [qualified_lhs, qualified_rhs] ) case UnqualifiedOperation(): # For function calls, invoke the same logic as for normal # qualification of function calls except with using # `qualify_join_condition` on the inputs instead of # `qualify_expression`. - operator = condition._parcel[0] + operator: pydop.PyDoughExpressionOperator = condition._parcel[0] unqualified_operands: list[UnqualifiedNode] = condition._parcel[1] qualified_operands: list[PyDoughQDAG] = [] for node in unqualified_operands: @@ -1172,7 +1166,7 @@ def qualify_best( kwargs: dict[str, object] = {"by": by, "allow_ties": allow_ties} if per: kwargs["per"] = per - rank: UnqualifiedNode = UnqualifiedOperator("RANKING")(**kwargs) + rank: UnqualifiedNode = UnqualifiedOperator(pydop.RANKING)(**kwargs) unqualified_cond: UnqualifiedNode = ( (rank == n_best) if n_best == 1 else (rank <= n_best) ) diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 2be3d852d..0825d9f05 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -30,7 +30,6 @@ from pydough.errors import PyDoughUnqualifiedException from pydough.errors.error_utils import is_bool, is_integer, is_positive_int, is_string from pydough.metadata import GraphMetadata -from pydough.pydough_operators import get_operator_by_name from pydough.types import ( ArrayType, BooleanType, @@ -125,8 +124,7 @@ def __getitem__(self, key): "PyDough objects are currently not supported to be used as indices in Python slices." ) args.append(coerced_elem) - operator = get_operator_by_name("SLICE") - return UnqualifiedOperation(operator, args) + return UnqualifiedOperation(pydop.SLICE, args) else: raise PyDoughUnqualifiedException( f"Cannot index into PyDough object {self} with {key!r}" @@ -144,99 +142,99 @@ def __bool__(self): def __add__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("+", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.ADD, self, other_unqualified) def __radd__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("+", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.ADD, other_unqualified, self) def __sub__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("-", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.SUB, self, other_unqualified) def __rsub__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("-", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.SUB, other_unqualified, self) def __mul__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("*", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.MUL, self, other_unqualified) def __rmul__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("*", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.MUL, other_unqualified, self) def __truediv__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("/", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.DIV, self, other_unqualified) def __rtruediv__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("/", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.DIV, other_unqualified, self) def __pow__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("**", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.POW, self, other_unqualified) def __rpow__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("**", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.POW, other_unqualified, self) def __mod__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("%", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.MOD, self, other_unqualified) def __rmod__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("%", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.MOD, other_unqualified, self) def __eq__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("==", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.EQU, self, other_unqualified) def __ne__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("!=", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.NEQ, self, other_unqualified) def __lt__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("<", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.LET, self, other_unqualified) def __le__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("<=", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.LEQ, self, other_unqualified) def __gt__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation(">", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.GRT, self, other_unqualified) def __ge__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation(">=", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.GEQ, self, other_unqualified) def __and__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("&", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.BAN, self, other_unqualified) def __rand__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("&", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.BAN, other_unqualified, self) def __or__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("|", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.BOR, self, other_unqualified) def __ror__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("|", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.BOR, other_unqualified, self) def __xor__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("^", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.BXR, self, other_unqualified) def __rxor__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("^", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.BXR, other_unqualified, self) def __pos__(self): return self @@ -245,8 +243,7 @@ def __neg__(self): return 0 - self def __invert__(self): - operator = get_operator_by_name("NOT") - return UnqualifiedOperation(operator, [self]) + return UnqualifiedOperation(pydop.NOT, [self]) def CALCULATE(self, *args, **kwargs: dict[str, object]): calc_args: list[tuple[str, UnqualifiedNode]] = [] @@ -268,15 +265,13 @@ def CALCULATE(self, *args, **kwargs: dict[str, object]): return UnqualifiedCalculate(self, calc_args) def __abs__(self): - operator = get_operator_by_name("ABS") - return UnqualifiedOperation(operator, [self]) + return UnqualifiedOperation(pydop.ABS, [self]) def __round__(self, n=None): if n is None: n = 0 n_unqualified = self.coerce_to_unqualified(n) - operator = get_operator_by_name("ROUND") - return UnqualifiedOperation(operator, [self, n_unqualified]) + return UnqualifiedOperation(pydop.ROUND, [self, n_unqualified]) def __floor__(self): raise PyDoughUnqualifiedException( @@ -447,24 +442,23 @@ class UnqualifiedRoot(UnqualifiedNode): """ def __init__(self, graph: GraphMetadata): - self._parcel: tuple[GraphMetadata, set[str]] = ( + func_map: dict[str, pydop.PyDoughOperator] = {} + for operator_name, operator in pydop.builtin_registered_operators().items(): + if not isinstance(operator, pydop.BinaryOperator): + func_map[operator_name] = operator + for operator_name in graph.get_function_names(): + func_map[operator_name] = graph.get_function(operator_name) + self._parcel: tuple[GraphMetadata, dict[str, pydop.PyDoughOperator]] = ( graph, - { - operator_name - for operator_name, operator in pydop.builtin_registered_operators().items() - if not isinstance(operator, pydop.BinaryOperator) - } - | set(graph.get_function_names()), + func_map, ) def __getattribute__(self, name: str) -> Any: - if name in super(UnqualifiedNode, self).__getattribute__("_parcel")[1]: - graph: GraphMetadata = super(UnqualifiedNode, self).__getattribute__( - "_parcel" - )[0] - if name in graph.get_function_names(): - return UnqualifiedOperator(name, graph.get_function(name)) - return UnqualifiedOperator(name) + func_map: dict[str, pydop.PyDoughOperator] = super( + UnqualifiedNode, self + ).__getattribute__("_parcel")[1] + if name in func_map: + return UnqualifiedOperator(func_map[name]) else: return super().__getattribute__(name) @@ -593,68 +587,21 @@ class UnqualifiedOperator(UnqualifiedNode): yet to be called. """ - def __init__( - self, name: str, operator: pydop.ExpressionFunctionOperator | None = None - ): - self._parcel: tuple[str, pydop.ExpressionFunctionOperator | None] = ( - name, - operator, - ) + def __init__(self, operator: pydop.PyDoughOperator): + self._parcel: tuple[pydop.PyDoughOperator] = (operator,) def __call__(self, *args, **kwargs): - per: str | None = None - window_operator: pydop.ExpressionWindowOperator - is_window: bool = True - operands: list[UnqualifiedNode] = [] - func_str: str = self._parcel[0] - for arg in args: - operands.append(self.coerce_to_unqualified(arg)) - match func_str: - case "PERCENTILE": - window_operator = pydop.PERCENTILE - is_positive_int.verify( - kwargs.get("n_buckets", 100), "`n_buckets` argument" - ) - case "RANKING": - window_operator = pydop.RANKING - is_bool.verify(kwargs.get("allow_ties", False), "`allow_ties` argument") - is_bool.verify(kwargs.get("dense", False), "`dense` argument") - case "PREV" | "NEXT": - window_operator = pydop.PREV if func_str == "PREV" else pydop.NEXT - is_integer.verify(kwargs.get("n", 1), "`n` argument") - if len(args) > 1: - is_integer.verify(args[1], "`n` argument") - case "RELSUM": - window_operator = pydop.RELSUM - case "RELAVG": - window_operator = pydop.RELAVG - case "RELCOUNT": - window_operator = pydop.RELCOUNT - case "RELSIZE": - window_operator = pydop.RELSIZE - case func_str: - is_window = False - if self._parcel[1] is None: - operator = get_operator_by_name(func_str, **kwargs) - else: - operator = self._parcel[1] - if isinstance(operator, pydop.ExpressionWindowOperator): - window_operator = operator - is_window = True - if is_window: - by: Iterable[UnqualifiedNode] = get_by_arg(kwargs, window_operator) - if "per" in kwargs: - per_arg = kwargs.pop("per") - is_string.verify(per_arg, "`per` argument") - per = per_arg - return UnqualifiedWindow( - window_operator, - operands, - by, - per, - kwargs, + operands: list[UnqualifiedNode] = [ + self.coerce_to_unqualified(arg) for arg in args + ] + if isinstance(self._parcel[0], pydop.ExpressionWindowOperator): + return call_window_operator(self._parcel[0], operands, **kwargs) + elif isinstance(self._parcel[0], pydop.ExpressionFunctionOperator): + return call_function_operator(self._parcel[0], operands, **kwargs) + else: + raise NotImplementedError( + f"Unsupported operator type: {self._parcel[0].__class__.__name__}" ) - return UnqualifiedOperation(operator, operands) class UnqualifiedOperation(UnqualifiedNode): @@ -701,8 +648,10 @@ class UnqualifiedBinaryOperation(UnqualifiedNode): Variant of UnqualifiedOperation specifically for builtin Python binops. """ - def __init__(self, operator: str, lhs: UnqualifiedNode, rhs: UnqualifiedNode): - self._parcel: tuple[str, UnqualifiedNode, UnqualifiedNode] = ( + def __init__( + self, operator: pydop.BinaryOperator, lhs: UnqualifiedNode, rhs: UnqualifiedNode + ): + self._parcel: tuple[pydop.BinaryOperator, UnqualifiedNode, UnqualifiedNode] = ( operator, lhs, rhs, @@ -868,7 +817,7 @@ def display_raw(unqualified: UnqualifiedNode) -> str: case _: return repr(literal_value) case UnqualifiedOperator(): - return unqualified._parcel[0] + return repr(unqualified._parcel[0]) case UnqualifiedOperation(): operands_str = ", ".join( [display_raw(operand) for operand in unqualified._parcel[1]] @@ -885,7 +834,7 @@ def display_raw(unqualified: UnqualifiedNode) -> str: operands_str += f", {kwarg}={val!r}" return f"{unqualified._parcel[0].function_name}({operands_str})" case UnqualifiedBinaryOperation(): - return f"({display_raw(unqualified._parcel[1])} {unqualified._parcel[0]} {display_raw(unqualified._parcel[2])})" + return f"({display_raw(unqualified._parcel[1])} {unqualified._parcel[0].binop.value} {display_raw(unqualified._parcel[2])})" case UnqualifiedCollation(): method: str = "ASC" if unqualified._parcel[1] else "DESC" pos: str = "'last'" if unqualified._parcel[2] else "'first'" @@ -935,3 +884,67 @@ def display_raw(unqualified: UnqualifiedNode) -> str: raise PyDoughUnqualifiedException( f"Unsupported unqualified node: {unqualified.__class__.__name__}" ) + + +def call_function_operator( + operator: pydop.ExpressionFunctionOperator, + operands: list[UnqualifiedNode], + **kwargs, +) -> UnqualifiedNode: + """ + TODO + """ + + # Check if this is a keyword branching operator + if isinstance(operator, pydop.KeywordBranchingExpressionFunctionOperator): + # Find the matching implementation based on kwargs + impl: pydop.ExpressionFunctionOperator | None = ( + operator.find_matching_implementation(kwargs) + ) + if impl is None: + kwarg_str = ", ".join(f"{k}={v!r}" for k, v in kwargs.items()) + raise PyDoughUnqualifiedException( + f"No matching implementation found for {operator.function_name}({kwarg_str})." + ) + operator = impl + + # Otherwise, verify there are no keyword arguments + elif len(kwargs) > 0: + raise PyDoughUnqualifiedException( + f"PyDough function {operator.function_name} does not support " + "keyword arguments at this time." + ) + + return UnqualifiedOperation(operator, operands) + + +def call_window_operator( + operator: pydop.ExpressionWindowOperator, operands: list[UnqualifiedNode], **kwargs +) -> UnqualifiedNode: + """ + TODO + """ + match operator: + case pydop.PERCENTILE: + is_positive_int.verify(kwargs.get("n_buckets", 100), "`n_buckets` argument") + case pydop.RANKING: + is_bool.verify(kwargs.get("allow_ties", False), "`allow_ties` argument") + is_bool.verify(kwargs.get("dense", False), "`dense` argument") + case pydop.PREV | pydop.NEXT: + is_integer.verify(kwargs.get("n", 1), "`n` argument") + if len(operands) > 1: + is_integer.verify(operands[1], "`n` argument") + + by: Iterable[UnqualifiedNode] = get_by_arg(kwargs, operator) + per: str | None = None + if "per" in kwargs: + per_arg = kwargs.pop("per") + is_string.verify(per_arg, "`per` argument") + per = per_arg + return UnqualifiedWindow( + operator, + operands, + by, + per, + kwargs, + ) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index c244afae3..f566a2e45 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & ABSENT(n_rows), columns={}) + FILTER(condition=ABSENT(n_rows) & c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), columns={}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_9.txt b/tests/test_plan_refsols/window_filter_order_9.txt index 26542bd19..fe91ae9f3 100644 --- a/tests/test_plan_refsols/window_filter_order_9.txt +++ b/tests/test_plan_refsols/window_filter_order_9.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[total_spent], partition=[], order=[]) & ABSENT(expr_0), columns={}) + FILTER(condition=ABSENT(expr_0) & o_totalprice < 0.05:numeric * RELAVG(args=[total_spent], partition=[], order=[]), columns={}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_relational.py b/tests/test_relational.py index 524c7504c..12cc43777 100644 --- a/tests/test_relational.py +++ b/tests/test_relational.py @@ -528,7 +528,7 @@ def test_invalid_limit(literal: LiteralExpression) -> None: ) }, ), - "AGGREGATE(keys={'a': Column(name=a, type=UnknownType())}, aggregations={'b': Call(op=Function[SUM], inputs=[Column(name=b, type=NumericType())], return_type=NumericType())})", + "AGGREGATE(keys={'a': Column(name=a, type=UnknownType())}, aggregations={'b': Call(op=SUM, inputs=[Column(name=b, type=NumericType())], return_type=NumericType())})", id="key_and_agg", ), pytest.param( @@ -556,7 +556,7 @@ def test_invalid_limit(literal: LiteralExpression) -> None: ), }, ), - "AGGREGATE(keys={}, aggregations={'a': Call(op=Function[SUM], inputs=[Column(name=a, type=NumericType())], return_type=NumericType()), 'b': Call(op=Function[SUM], inputs=[Column(name=b, type=NumericType())], return_type=NumericType())})", + "AGGREGATE(keys={}, aggregations={'a': Call(op=SUM, inputs=[Column(name=a, type=NumericType())], return_type=NumericType()), 'b': Call(op=SUM, inputs=[Column(name=b, type=NumericType())], return_type=NumericType())})", id="no_keys", ), pytest.param( diff --git a/tests/test_relational_expressions.py b/tests/test_relational_expressions.py index c7137a199..535af525b 100644 --- a/tests/test_relational_expressions.py +++ b/tests/test_relational_expressions.py @@ -243,12 +243,12 @@ def test_expression_sort_info_equals( [ pytest.param( CallExpression(LOWER, StringType(), [ColumnReference("a", StringType())]), - "Call(op=Function[LOWER], inputs=[Column(name=a, type=StringType())], return_type=StringType())", + "Call(op=LOWER, inputs=[Column(name=a, type=StringType())], return_type=StringType())", id="lower", ), pytest.param( CallExpression(SUM, NumericType(), [ColumnReference("a", NumericType())]), - "Call(op=Function[SUM], inputs=[Column(name=a, type=NumericType())], return_type=NumericType())", + "Call(op=SUM, inputs=[Column(name=a, type=NumericType())], return_type=NumericType())", id="sum", ), ], diff --git a/tests/test_unqualified_node.py b/tests/test_unqualified_node.py index 1e04d6d29..7c527a44e 100644 --- a/tests/test_unqualified_node.py +++ b/tests/test_unqualified_node.py @@ -740,8 +740,7 @@ def test_init_pydough_context( pytest.param( bad_unsupported_kwarg3, re.escape( - "PyDough function call SUM does not support " - "keyword arguments at this time." + "PyDough function SUM does not support keyword arguments at this time." ), id="bad_unsupported_kwarg3", ), From 9f0961d0fd677f60e026d10d12ed8f648c10f35f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 12:49:29 -0400 Subject: [PATCH 053/143] Moved function mismatch errors to use min edit distance --- pydough/errors/error_utils.py | 124 +++++++++++++++++ pydough/errors/pydough_error_builder.py | 49 ++++++- .../pydough_operators/operator_registry.py | 24 +--- pydough/qdag/collections/collection_qdag.py | 127 +----------------- pydough/unqualified/unqualified_node.py | 5 +- tests/test_metadata_errors.py | 2 +- tests/test_qualification_errors.py | 12 +- 7 files changed, 195 insertions(+), 148 deletions(-) diff --git a/pydough/errors/error_utils.py b/pydough/errors/error_utils.py index b679fa217..84142b73b 100644 --- a/pydough/errors/error_utils.py +++ b/pydough/errors/error_utils.py @@ -19,6 +19,7 @@ "extract_integer", "extract_object", "extract_string", + "find_possible_name_matches", "is_bool", "is_integer", "is_json_array", @@ -33,6 +34,8 @@ from abc import ABC, abstractmethod +import numpy as np + from .error_types import PyDoughMetadataException ############################################################################### @@ -443,3 +446,124 @@ def extract_object(json_obj: dict, key_name: str, obj_name: str) -> dict: value = json_obj[key_name] assert isinstance(value, dict) return value + + +############################################################################### +# Name Suggestion Utilities +############################################################################### + + +def min_edit_distance(s: str, t: str) -> float: + """ + Computes the minimum edit distance between two strings using the + Levenshtein distance algorithm. Substituting a character for the same + character with different capitalization is considered 10% of the edit + cost of replacing it with any other character. For this implementation + the iterative with a 2-row array is used to save memory. + Link: + https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows + + Args: + `s`: The first string. + `t`: The second string. + + Returns: + The minimum edit distance between the two strings. + """ + # Ensures str1 is the shorter string + if len(s) > len(t): + s, t = t, s + m, n = len(s), len(t) + + # Use a 2 x (m + 1) array to represent an n x (m + 1) array since you only + # need to consider the previous row to generate the next row, therefore the + # same two rows can be recycled + + row, previousRow = 1, 0 + arr = np.zeros((2, m + 1), dtype=float) + + # MED(X, "") = len(X) + arr[0, :] = np.arange(m + 1) + + for i in range(1, n + 1): + # MED("", X) = len(X) + arr[row, 0] = i + + # Loop over the rest of s to see if it matches with the corresponding + # letter of t + for j in range(1, m + 1): + substitution_cost: float + + if s[j - 1] == t[i - 1]: + substitution_cost = 0.0 + elif s[j - 1].lower() == t[i - 1].lower(): + substitution_cost = 0.1 + else: + substitution_cost = 1.0 + + arr[row, j] = min( + arr[row, j - 1] + 1.0, + arr[previousRow, j] + 1.0, + arr[previousRow, j - 1] + substitution_cost, + ) + + row, previousRow = previousRow, row + + return arr[previousRow, m] # Return the last computed row's last element + + +def find_possible_name_matches( + term_name: str, candidates: set[str], atol: int, rtol: float, min_names: int +) -> list[str]: + """ + Finds and returns a list of candidate names that closely match the + given name based on minimum edit distance. + + Args: + `term_name`: The name to match against the list of candidates. + `candidates`: A set of candidate names to search for matches. + `atol`: The absolute tolerance for the minimum edit distance; any + candidate with a minimum edit distance less than or equal to + `closest_match + atol` will be included in the results. + `rtol`: The relative tolerance for the minimum edit distance; any + candidate with a minimum edit distance less than or equal to + `closest_match * (1 + rtol)` will be included in the results. + `min_names`: The minimum number of names to return. + + Returns: + A list of candidate names, based on the closest matches. + """ + + terms_distance_list: list[tuple[float, str]] = [] + + for term in candidates: + # get the minimum edit distance + me: float = min_edit_distance(term_name, term) + terms_distance_list.append((me, term)) + + if terms_distance_list == []: + return [] + # sort the list by minimum edit distance break ties by name + terms_distance_list.sort() + + closest_match = terms_distance_list[0] + + # List with all names that have a me <= closest_match + atol + matches_within_atol: list[str] = [ + name for me, name in terms_distance_list if me <= closest_match[0] + atol + ] + + # List with all names that have a me <= closest_match * 1.1 + matches_within_rtol: list[str] = [ + name for me, name in terms_distance_list if me <= closest_match[0] * (1 + rtol) + ] + + # List with the top 3 closest matches (me) breaking ties by name + min_matches: list[str] = [name for _, name in terms_distance_list[:min_names]] + + # Return whichever of the three lists is the longest, breaking ties + # lexicographically by the names within. + return max( + [matches_within_atol, matches_within_rtol, min_matches], + key=lambda x: (len(x), x), + ) diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index 0a560b520..c9c0fbbfd 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -4,12 +4,19 @@ from typing import TYPE_CHECKING -from pydough.errors import PyDoughException, PyDoughQDAGException, PyDoughSQLException +from pydough.errors import ( + PyDoughException, + PyDoughQDAGException, + PyDoughSQLException, + PyDoughUnqualifiedException, +) +from pydough.errors.error_utils import find_possible_name_matches if TYPE_CHECKING: from pydough.pydough_operators import PyDoughOperator from pydough.qdag import PyDoughCollectionQDAG, PyDoughExpressionQDAG from pydough.relational import CallExpression + from pydough.unqualified import UnqualifiedNode class PyDoughErrorBuilder: @@ -230,3 +237,43 @@ def sql_call_conversion_error( return PyDoughQDAGException( f"Failed to convert expression {call.to_string(True)} to SQL: {error}" ) + + def undefined_function_call( + self, node: "UnqualifiedNode", *args, **kwargs + ) -> PyDoughException: + """ + Creates an exception for when a function call is made on an unqualified + node that is not callable. + + Args: + `node`: The unqualified node that was called as if it were a + function. + `*args`: Positional arguments passed to the call. + `**kwargs`: Keyword arguments passed to the call. + + Returns: + An exception indicating that the node is not callable. + """ + from pydough.unqualified import UnqualifiedAccess, UnqualifiedRoot + + error_message: str = f"PyDough object {node!r} is not callable." + # If in the form root.XXX, then it is possible that XXXX is a typo of + # a function name. + if isinstance(node, UnqualifiedAccess) and isinstance( + node._parcel[0], UnqualifiedRoot + ): + suggestions: list[str] = find_possible_name_matches( + term_name=node._parcel[1], + candidates=set(node._parcel[0]._parcel[1]), + atol=1, + rtol=0.1, + min_names=3, + ) + + # Check if there are any suggestions to add + if len(suggestions) > 0: + suggestions_str: str = ", ".join(suggestions) + error_message += f" Did you mean: {suggestions_str}?" + else: + error_message += " Did you mean to use a function?" + return PyDoughUnqualifiedException(error_message) diff --git a/pydough/pydough_operators/operator_registry.py b/pydough/pydough_operators/operator_registry.py index dd88b02bc..779f9acb4 100644 --- a/pydough/pydough_operators/operator_registry.py +++ b/pydough/pydough_operators/operator_registry.py @@ -27,36 +27,24 @@ def builtin_registered_operators() -> dict[str, PyDoughOperator]: return operators -def get_operator_by_name(name: str, **kwargs) -> ExpressionFunctionOperator: +def get_operator_by_name(name: str) -> ExpressionFunctionOperator: """ Retrieves a registered PyDough operator by its a name. - This function searches for an operator within the registered expression - operators. If the operator is a `KeywordBranchingExpressionFunctionOperator`, - it will attempt to find a specific implementation that matches the provided - keyword arguments. - Args: name: The name of the operator to retrieve. - **kwargs: Keyword arguments that may be used to select a specific - implementation if the operator is a - `KeywordBranchingExpressionFunctionOperator`. Returns: - The `ExpressionFunctionOperator` corresponding to the given name and - keyword arguments. + The `ExpressionFunctionOperator` corresponding to the given name. Raises: - PyDoughUnqualifiedException: If the operator with the given name is - not found, or if no matching implementation is found for a - `KeywordBranchingExpressionFunctionOperator` with the provided - keyword arguments, or if keyword arguments are provided for an - operator that does not support them. + `PyDoughUnqualifiedException`: If the operator with the given name is + not found. """ # Find the operator directly using inspect for op_name, obj in inspect.getmembers(REP): if op_name == name and op_name in REP.__all__ and obj.public: return obj - else: - raise PyDoughUnqualifiedException(f"Operator {name} not found.") + # If not found, raise an exception + raise PyDoughUnqualifiedException(f"Operator {name} not found.") diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index a68ba1468..e5f16077b 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -10,9 +10,8 @@ from functools import cache, cached_property from typing import Union -import numpy as np - import pydough +from pydough.errors.error_utils import find_possible_name_matches from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions.collation_expression import CollationExpression from pydough.qdag.expressions.expression_qdag import PyDoughExpressionQDAG @@ -357,122 +356,6 @@ def to_tree_string(self) -> str: """ return "\n".join(self.to_tree_form(True).to_string_rows()) - def find_possible_name_matches( - self, term_name: str, atol: int, rtol: float, min_names: int - ) -> list[str]: - """ - Finds and returns a list of candidate names that closely match the - given name based on minimum edit distance. - - Args: - `term_name`: The name to match against the list of candidates. - `atol`: The absolute tolerance for the minimum edit distance; any - candidate with a minimum edit distance less than or equal to - `closest_match + atol` will be included in the results. - `rtol`: The relative tolerance for the minimum edit distance; any - candidate with a minimum edit distance less than or equal to - `closest_match * (1 + rtol)` will be included in the results. - `min_names`: The minimum number of names to return. - - Returns: - A list of candidate names, based on the closest matches. - """ - - terms_distance_list: list[tuple[float, str]] = [] - - for term in self.all_terms: - # get the minimum edit distance - me: float = self.min_edit_distance(term_name, term) - terms_distance_list.append((me, term)) - - if terms_distance_list == []: - return [] - # sort the list by minimum edit distance break ties by name - terms_distance_list.sort() - - closest_match = terms_distance_list[0] - - # List with all names that have a me <= closest_match + atol - matches_within_atol: list[str] = [ - name for me, name in terms_distance_list if me <= closest_match[0] + atol - ] - - # List with all names that have a me <= closest_match * 1.1 - matches_within_rtol: list[str] = [ - name - for me, name in terms_distance_list - if me <= closest_match[0] * (1 + rtol) - ] - - # List with the top 3 closest matches (me) breaking ties by name - min_matches: list[str] = [name for _, name in terms_distance_list[:min_names]] - - # Return whichever of the three lists is the longest, breaking ties - # lexicographically by the names within. - return max( - [matches_within_atol, matches_within_rtol, min_matches], - key=lambda x: (len(x), x), - ) - - @staticmethod - def min_edit_distance(s: str, t: str) -> float: - """ - Computes the minimum edit distance between two strings using the - Levenshtein distance algorithm. Substituting a character for the same - character with different capitalization is considered 10% of the edit - cost of replacing it with any other character. For this implementation - the iterative with a 2-row array is used to save memory. - Link: - https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows - - Args: - `s`: The first string. - `t`: The second string. - - Returns: - The minimum edit distance between the two strings. - """ - # Ensures str1 is the shorter string - if len(s) > len(t): - s, t = t, s - m, n = len(s), len(t) - - # Use a 2 x (m + 1) array to represent an n x (m + 1) array since you only - # need to consider the previous row to generate the next row, therefore the - # same two rows can be recycled - - row, previousRow = 1, 0 - arr = np.zeros((2, m + 1), dtype=float) - - # MED(X, "") = len(X) - arr[0, :] = np.arange(m + 1) - - for i in range(1, n + 1): - # MED("", X) = len(X) - arr[row, 0] = i - - # Loop over the rest of s to see if it matches with the corresponding - # letter of t - for j in range(1, m + 1): - substitution_cost: float - - if s[j - 1] == t[i - 1]: - substitution_cost = 0.0 - elif s[j - 1].lower() == t[i - 1].lower(): - substitution_cost = 0.1 - else: - substitution_cost = 1.0 - - arr[row, j] = min( - arr[row, j - 1] + 1.0, - arr[previousRow, j] + 1.0, - arr[previousRow, j - 1] + substitution_cost, - ) - - row, previousRow = previousRow, row - - return arr[previousRow, m] # Return the last computed row's last element - def name_mismatch_error( self, term_name: str, atol: int = 2, rtol: float = 0.1, min_names: int = 3 ) -> str: @@ -496,8 +379,12 @@ def name_mismatch_error( """ error_message: str = f"Unrecognized term of {self.to_string()}: {term_name!r}." - suggestions: list[str] = self.find_possible_name_matches( - term_name=term_name, atol=atol, rtol=rtol, min_names=min_names + suggestions: list[str] = find_possible_name_matches( + term_name=term_name, + candidates=self.all_terms, + atol=atol, + rtol=rtol, + min_names=min_names, ) # Check if there are any suggestions to add diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 0825d9f05..4dc1f57cc 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -26,6 +26,7 @@ from datetime import date, datetime from typing import Any, Union +import pydough import pydough.pydough_operators as pydop from pydough.errors import PyDoughUnqualifiedException from pydough.errors.error_utils import is_bool, is_integer, is_positive_int, is_string @@ -131,8 +132,8 @@ def __getitem__(self, key): ) def __call__(self, *args, **kwargs): - raise PyDoughUnqualifiedException( - f"PyDough nodes {self!r} is not callable. Did you mean to use a function?" + raise pydough.active_session.error_builder.undefined_function_call( + self, *args, **kwargs ) def __bool__(self): diff --git a/tests/test_metadata_errors.py b/tests/test_metadata_errors.py index 3a873fc4d..e079b59e3 100644 --- a/tests/test_metadata_errors.py +++ b/tests/test_metadata_errors.py @@ -765,7 +765,7 @@ def test_invalid_graphs( ), pytest.param( "parent.sub4", - "Malformed general join condition: 'is_prime(self.j1) != is_prime(self.j2)' (PyDough nodes is_prime is not callable. Did you mean to use a function?)", + "Malformed general join condition: 'is_prime(self.j1) != is_prime(self.j2)' (PyDough object is_prime is not callable. Did you mean: DATETIME, SLICE, ISIN, STRING, STRIP, DAYNAME, KEEP_IF, LIKE, QUANTILE, RELSIZE, REPLACE, SIGN, SUM, PREV, SQRT?)", id="bad_syntax_3", ), pytest.param( diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index e40cb94c0..8d7da66f0 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -29,7 +29,7 @@ ), pytest.param( "result = nations.CALCULATE(nation_name=FIZZBUZZ(name))", - "PyDough nodes FIZZBUZZ is not callable. Did you mean to use a function?", + "PyDough object FIZZBUZZ is not callable. Did you mean: FIND, MINUTE, ABS, COUNT, FLOAT, FLOOR, HOUR, IFF, INTEGER, ISIN, LIKE, MIN, RELCOUNT, RELSIZE, RELSUM, ROUND, SIGN, STRCOUNT, SUM?", id="non_function", ), pytest.param( @@ -54,7 +54,7 @@ ), pytest.param( "lines.CALCULATE(v=MUL(extended_price, SUB(1, discount)))", - "PyDough nodes SUB is not callable. Did you mean to use a function?", + "PyDough object SUB is not callable. Did you mean: SUM, STD, ABS?", id="binop_function_call", ), pytest.param( @@ -122,22 +122,22 @@ ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", - "PyDough nodes SAMPLE_VAR is not callable. Did you mean to use a function?", + "PyDough object SAMPLE_VAR is not callable. Did you mean: MEDIAN, PREV, SMALLEST, UPPER, VAR, YEAR, ABSENT, AVG, DATEDIFF, DATETIME, FLOAT, FLOOR, GETPART, INTEGER, KEEP_IF, LARGEST, LENGTH, LOWER, LPAD, MAX, POWER, PRESENT, QUARTER, RELAVG, REPLACE, RPAD, SECOND, SLICE, SUM?", id="kwargfunc_1", ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VARIANCE(suppliers.account_balance))", - "PyDough nodes SAMPLE_VARIANCE is not callable. Did you mean to use a function?", + "PyDough object SAMPLE_VARIANCE is not callable. Did you mean: MEDIAN, REPLACE, SLICE, DATETIME, JOIN_STRINGS, STRING?", id="kwargfunc_2", ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_STD(suppliers.account_balance))", - "PyDough nodes SAMPLE_STD is not callable. Did you mean to use a function?", + "PyDough object SAMPLE_STD is not callable. Did you mean: SMALLEST, LARGEST, ABSENT?", id="kwargfunc_3", ), pytest.param( "result = nations.CALCULATE(name=name, std=POPULATION_STD(suppliers.account_balance))", - "PyDough nodes POPULATION_STD is not callable. Did you mean to use a function?", + "PyDough object POPULATION_STD is not callable. Did you mean: CONTAINS, COUNT, DEFAULT_TO, JOIN_STRINGS, LARGEST, MONOTONIC, NDISTINCT, ROUND?", id="kwargfunc_4", ), pytest.param( From 2e3030000d78305dd5a79c377025eb370ab94634 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 13:55:19 -0400 Subject: [PATCH 054/143] Adjusting tuning of min edit distance errors --- pydough/errors/error_utils.py | 50 +++++++++++++++------ pydough/errors/pydough_error_builder.py | 17 ++++++- pydough/qdag/collections/collection_qdag.py | 21 ++++++++- tests/test_exploration.py | 4 +- tests/test_metadata_errors.py | 2 +- tests/test_qdag_collection_errors.py | 6 +-- tests/test_qualification_errors.py | 14 +++--- 7 files changed, 85 insertions(+), 29 deletions(-) diff --git a/pydough/errors/error_utils.py b/pydough/errors/error_utils.py index 84142b73b..316cc7afa 100644 --- a/pydough/errors/error_utils.py +++ b/pydough/errors/error_utils.py @@ -453,7 +453,14 @@ def extract_object(json_obj: dict, key_name: str, obj_name: str) -> dict: ############################################################################### -def min_edit_distance(s: str, t: str) -> float: +def min_edit_distance( + s: str, + t: str, + insert_cost: float, + delete_cost: float, + substitution_cost: float, + capital_cost: float, +) -> float: """ Computes the minimum edit distance between two strings using the Levenshtein distance algorithm. Substituting a character for the same @@ -466,13 +473,15 @@ def min_edit_distance(s: str, t: str) -> float: Args: `s`: The first string. `t`: The second string. + `insert_cost`: The cost of inserting a character into the first string. + `delete_cost`: The cost of deleting a character from the first string. + `substitution_cost`: The cost of substituting a character. + `capital_cost`: The cost of substituting a character with the same + character with different capitalization. Returns: The minimum edit distance between the two strings. """ - # Ensures str1 is the shorter string - if len(s) > len(t): - s, t = t, s m, n = len(s), len(t) # Use a 2 x (m + 1) array to represent an n x (m + 1) array since you only @@ -492,19 +501,19 @@ def min_edit_distance(s: str, t: str) -> float: # Loop over the rest of s to see if it matches with the corresponding # letter of t for j in range(1, m + 1): - substitution_cost: float + sub_cost: float if s[j - 1] == t[i - 1]: - substitution_cost = 0.0 + sub_cost = 0.0 elif s[j - 1].lower() == t[i - 1].lower(): - substitution_cost = 0.1 + sub_cost = capital_cost else: - substitution_cost = 1.0 + sub_cost = substitution_cost arr[row, j] = min( - arr[row, j - 1] + 1.0, - arr[previousRow, j] + 1.0, - arr[previousRow, j - 1] + substitution_cost, + arr[row, j - 1] + insert_cost, + arr[previousRow, j] + delete_cost, + arr[previousRow, j - 1] + sub_cost, ) row, previousRow = previousRow, row @@ -513,7 +522,15 @@ def min_edit_distance(s: str, t: str) -> float: def find_possible_name_matches( - term_name: str, candidates: set[str], atol: int, rtol: float, min_names: int + term_name: str, + candidates: set[str], + atol: int, + rtol: float, + min_names: int, + insert_cost: float, + delete_cost: float, + substitution_cost: float, + capital_cost: float, ) -> list[str]: """ Finds and returns a list of candidate names that closely match the @@ -529,6 +546,11 @@ def find_possible_name_matches( candidate with a minimum edit distance less than or equal to `closest_match * (1 + rtol)` will be included in the results. `min_names`: The minimum number of names to return. + `insert_cost`: The cost of inserting a character into the first string. + `delete_cost`: The cost of deleting a character from the first string. + `substitution_cost`: The cost of substituting a character. + `capital_cost`: The cost of substituting a character with the same + character with different capitalization. Returns: A list of candidate names, based on the closest matches. @@ -538,7 +560,9 @@ def find_possible_name_matches( for term in candidates: # get the minimum edit distance - me: float = min_edit_distance(term_name, term) + me: float = min_edit_distance( + term_name, term, insert_cost, delete_cost, substitution_cost, capital_cost + ) terms_distance_list.append((me, term)) if terms_distance_list == []: diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index c9c0fbbfd..9582f0295 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -42,7 +42,16 @@ def term_not_found( An exception indicating that the term was not found. """ return PyDoughQDAGException( - collection.name_mismatch_error(term_name, atol=2, rtol=0.1, min_names=3) + collection.name_mismatch_error( + term_name, + atol=2, + rtol=0.1, + min_names=3, + insert_cost=0.5, + delete_cost=1.0, + substitution_cost=1.0, + capital_cost=0.1, + ) ) def down_streaming_conflict( @@ -265,9 +274,13 @@ def undefined_function_call( suggestions: list[str] = find_possible_name_matches( term_name=node._parcel[1], candidates=set(node._parcel[0]._parcel[1]), - atol=1, + atol=2, rtol=0.1, min_names=3, + insert_cost=0.5, + delete_cost=1.0, + substitution_cost=1.0, + capital_cost=0.1, ) # Check if there are any suggestions to add diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index e5f16077b..ede9ab58b 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -357,7 +357,15 @@ def to_tree_string(self) -> str: return "\n".join(self.to_tree_form(True).to_string_rows()) def name_mismatch_error( - self, term_name: str, atol: int = 2, rtol: float = 0.1, min_names: int = 3 + self, + term_name: str, + atol: int = 2, + rtol: float = 0.1, + min_names: int = 3, + insert_cost: float = 1.0, + delete_cost: float = 1.0, + substitution_cost: float = 1.0, + capital_cost: float = 0.1, ) -> str: """ Raises a name mismatch error with suggestions if possible. @@ -373,6 +381,13 @@ def name_mismatch_error( names with a minimum edit distance less than or equal to `closest_match * (1 + rtol)` will be included as a suggestion. `min_names`: The minimum number of suggestions to include. + `insert_cost`: The cost of inserting a character into the first + string. + `delete_cost`: The cost of deleting a character from the first + string. + `substitution_cost`: The cost of substituting a character. + `capital_cost`: The cost of substituting a character with the same + character with different capitalization. Returns: A string describing the error, including suggestions if available. @@ -385,6 +400,10 @@ def name_mismatch_error( atol=atol, rtol=rtol, min_names=min_names, + insert_cost=insert_cost, + delete_cost=delete_cost, + substitution_cost=substitution_cost, + capital_cost=capital_cost, ) # Check if there are any suggestions to add diff --git a/tests/test_exploration.py b/tests/test_exploration.py index e74e11ef1..1dd10ef21 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -1222,13 +1222,13 @@ def test_graph_structure( "TPCH", contextless_collections_impl, """ -Unrecognized term of TPCH: 'line_items'. Did you mean: lines, nations, regions, suppliers? +Unrecognized term of TPCH: 'line_items'. Did you mean: lines, parts, regions? This could mean you accessed a property using a name that does not exist, or that you need to place your PyDough code into a context for it to make sense. Did you mean to use pydough.explain_term? """, """ -Unrecognized term of TPCH: 'line_items'. Did you mean: lines, nations, regions, suppliers? +Unrecognized term of TPCH: 'line_items'. Did you mean: lines, parts, regions? This could mean you accessed a property using a name that does not exist, or that you need to place your PyDough code into a context for it to make sense. Did you mean to use pydough.explain_term? diff --git a/tests/test_metadata_errors.py b/tests/test_metadata_errors.py index e079b59e3..3214fb8fb 100644 --- a/tests/test_metadata_errors.py +++ b/tests/test_metadata_errors.py @@ -765,7 +765,7 @@ def test_invalid_graphs( ), pytest.param( "parent.sub4", - "Malformed general join condition: 'is_prime(self.j1) != is_prime(self.j2)' (PyDough object is_prime is not callable. Did you mean: DATETIME, SLICE, ISIN, STRING, STRIP, DAYNAME, KEEP_IF, LIKE, QUANTILE, RELSIZE, REPLACE, SIGN, SUM, PREV, SQRT?)", + "Malformed general join condition: 'is_prime(self.j1) != is_prime(self.j2)' (PyDough object is_prime is not callable. Did you mean: ISIN, LIKE, SUM, SLICE, STRIP, IFF, MIN, VAR, PREV, SIGN, SQRT, STRING, ABS, CEIL, FIND, HAS, HOUR, LPAD, RPAD, STD, YEAR, UPPER, DATETIME?)", id="bad_syntax_3", ), pytest.param( diff --git a/tests/test_qdag_collection_errors.py b/tests/test_qdag_collection_errors.py index 3ba8a92dd..7c0d9c72c 100644 --- a/tests/test_qdag_collection_errors.py +++ b/tests/test_qdag_collection_errors.py @@ -29,12 +29,12 @@ [ pytest.param( TableCollectionInfo("Rainbows"), - "Unrecognized term of TPCH: 'Rainbows'. Did you mean: lines, nations, regions, parts, orders?", + "Unrecognized term of TPCH: 'Rainbows'. Did you mean: lines, nations, regions, parts", id="table_dne", ), pytest.param( TableCollectionInfo("regions") ** SubCollectionInfo("postage_stamps"), - "Unrecognized term of TPCH.regions: 'postage_stamps'. Did you mean: comment, nations, name, key?", + "Unrecognized term of TPCH.regions: 'postage_stamps'. Did you mean: name, comment, key, nations?", id="subcollection_dne", ), pytest.param( @@ -47,7 +47,7 @@ TableCollectionInfo("nations") ** SubCollectionInfo("suppliers") ** CalculateInfo([], foo=ReferenceInfo("region_key")), - "Unrecognized term of TPCH.nations.suppliers: 'region_key'. Did you mean: nation_key, key, lines?", + "Unrecognized term of TPCH.nations.suppliers: 'region_key'. Did you mean: nation_key, key, lines, phone, nation?", id="reference_bad_ancestry", ), pytest.param( diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index 8d7da66f0..de266738c 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -24,12 +24,12 @@ [ pytest.param( "result = nations.CALCULATE(nation_name=name, total_balance=SUM(account_balance))", - "Unrecognized term of TPCH.nations: 'account_balance'. Did you mean: comment, customers, name, region_key, suppliers, region?", + "Unrecognized term of TPCH.nations: 'account_balance'. Did you mean: name, comment, key, region, customers, region_key?", id="bad_name", ), pytest.param( "result = nations.CALCULATE(nation_name=FIZZBUZZ(name))", - "PyDough object FIZZBUZZ is not callable. Did you mean: FIND, MINUTE, ABS, COUNT, FLOAT, FLOOR, HOUR, IFF, INTEGER, ISIN, LIKE, MIN, RELCOUNT, RELSIZE, RELSUM, ROUND, SIGN, STRCOUNT, SUM?", + "PyDough object FIZZBUZZ is not callable. Did you mean: FIND, ABS, MIN, SUM, HOUR, IFF, LIKE, MINUTE, SIGN, AVG, CEIL, COUNT, DAY, FLOAT, FLOOR, HAS, ISIN, MAX, NOT, ROUND, STD, VAR, LPAD, NEXT, PREV, RELSUM, RPAD, SLICE, SQRT, YEAR?", id="non_function", ), pytest.param( @@ -54,7 +54,7 @@ ), pytest.param( "lines.CALCULATE(v=MUL(extended_price, SUB(1, discount)))", - "PyDough object SUB is not callable. Did you mean: SUM, STD, ABS?", + "PyDough object SUB is not callable. Did you mean: SUM, STD, ABS, AVG, DAY, HAS, HOUR, IFF, ISIN, MAX, MIN, NOT, SIGN, SQRT, VAR?", id="binop_function_call", ), pytest.param( @@ -122,22 +122,22 @@ ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", - "PyDough object SAMPLE_VAR is not callable. Did you mean: MEDIAN, PREV, SMALLEST, UPPER, VAR, YEAR, ABSENT, AVG, DATEDIFF, DATETIME, FLOAT, FLOOR, GETPART, INTEGER, KEEP_IF, LARGEST, LENGTH, LOWER, LPAD, MAX, POWER, PRESENT, QUARTER, RELAVG, REPLACE, RPAD, SECOND, SLICE, SUM?", + "PyDough object SAMPLE_VAR is not callable. Did you mean: YEAR, SUM, UPPER, VAR, AVG, LPAD, PREV, RPAD, DAY, FLOAT, FLOOR, HAS, LOWER, MAX, POWER, SLICE, SMALLEST, SQRT, STD, ABS, CEIL, GETPART, HOUR, LIKE, MEDIAN, NEXT, QUARTER, RELAVG, REPLACE, SECOND, SIGN, STRIP?", id="kwargfunc_1", ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VARIANCE(suppliers.account_balance))", - "PyDough object SAMPLE_VARIANCE is not callable. Did you mean: MEDIAN, REPLACE, SLICE, DATETIME, JOIN_STRINGS, STRING?", + "PyDough object SAMPLE_VARIANCE is not callable. Did you mean: SLICE, REPLACE, MEDIAN, SIGN, STRING, YEAR, ISIN, MIN, STRIP, SUM, UPPER, VAR?", id="kwargfunc_2", ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_STD(suppliers.account_balance))", - "PyDough object SAMPLE_STD is not callable. Did you mean: SMALLEST, LARGEST, ABSENT?", + "PyDough object SAMPLE_STD is not callable. Did you mean: SMALLEST, STD, HAS, LARGEST, SUM, ABS, LPAD, NEXT, RPAD, SECOND, SQRT, ABSENT, DAY, FLOAT, MAX, NOT, SLICE, UPPER, VAR?", id="kwargfunc_3", ), pytest.param( "result = nations.CALCULATE(name=name, std=POPULATION_STD(suppliers.account_balance))", - "PyDough object POPULATION_STD is not callable. Did you mean: CONTAINS, COUNT, DEFAULT_TO, JOIN_STRINGS, LARGEST, MONOTONIC, NDISTINCT, ROUND?", + "COUNT, ROUND, CONTAINS, FIND, LPAD, RPAD, FLOAT, HAS, MIN, MONTH, NOT, STD, HASNOT, HOUR, ISIN, MINUTE, SECOND, SIGN, ABS, DAY, DEFAULT_TO, FLOOR, LARGEST, MAX, MONOTONIC, NDISTINCT, POWER, PRESENT, QUANTILE, RELCOUNT, REPLACE, SLICE, STRING, SUM, VAR?", id="kwargfunc_4", ), pytest.param( From 75c3b7c06cfee0ff85c5fc7a42bcaf4cfdf494c6 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 14:22:21 -0400 Subject: [PATCH 055/143] Messing with function handling, VARIANCE name, error tuning --- pydough/pydough_operators/__init__.py | 8 +-- .../expression_operators/README.md | 6 +-- .../expression_operators/__init__.py | 8 +-- .../registered_expression_operators.py | 8 +-- .../collections/augmenting_child_operator.py | 1 - .../base_transform_bindings.py | 4 +- tests/test_pipeline_tpch_custom.py | 49 ++++++++++--------- tests/test_plan_refsols/simple_var_std.txt | 2 +- .../simple_var_std_with_nulls.txt | 2 +- .../all_pydough_functions_dialects.py | 6 ++- tests/test_qualification_errors.py | 4 +- .../aggregation_functions_ansi.sql | 10 ++-- .../aggregation_functions_sqlite.sql | 43 ++++++++++++---- 13 files changed, 91 insertions(+), 60 deletions(-) diff --git a/pydough/pydough_operators/__init__.py b/pydough/pydough_operators/__init__.py index f3194928d..42f7f384e 100644 --- a/pydough/pydough_operators/__init__.py +++ b/pydough/pydough_operators/__init__.py @@ -67,7 +67,7 @@ "NOT", "PERCENTILE", "POPULATION_STD", - "POPULATION_VARIANCE", + "POPULATION_VAR", "POW", "POWER", "PRESENT", @@ -89,7 +89,7 @@ "RequireMinArgs", "RequireNumArgs", "SAMPLE_STD", - "SAMPLE_VARIANCE", + "SAMPLE_VAR", "SECOND", "SIGN", "SLICE", @@ -171,7 +171,7 @@ NOT, PERCENTILE, POPULATION_STD, - POPULATION_VARIANCE, + POPULATION_VAR, POW, POWER, PRESENT, @@ -187,7 +187,7 @@ ROUND, RPAD, SAMPLE_STD, - SAMPLE_VARIANCE, + SAMPLE_VAR, SECOND, SIGN, SLICE, diff --git a/pydough/pydough_operators/expression_operators/README.md b/pydough/pydough_operators/expression_operators/README.md index 6bd9cd368..f3a7a4b98 100644 --- a/pydough/pydough_operators/expression_operators/README.md +++ b/pydough/pydough_operators/expression_operators/README.md @@ -22,7 +22,7 @@ The expression_operators module provides functionality to define and manage vari ### [keyword_branching_operators.py](keyword_branching_operators.py) -- `KeywordBranchingExpressionFunctionOperator`: Implementation class for PyDough operators that return an `ExpressionFunctionOperator` and represent a function call that supports keyword arguments, such as `VAR` or `STD`. For example, `VAR` can be set with the keyword argument `type="population"` or `type="sample"`, thereby creating two different operators, `POPULATION_VARIANCE` and `SAMPLE_VARIANCE`. +- `KeywordBranchingExpressionFunctionOperator`: Implementation class for PyDough operators that return an `ExpressionFunctionOperator` and represent a function call that supports keyword arguments, such as `VAR` or `STD`. For example, `VAR` can be set with the keyword argument `type="population"` or `type="sample"`, thereby creating two different operators, `POPULATION_VAR` and `SAMPLE_VAR`. ### [binary_operators.py](binary_operators.py) @@ -145,9 +145,9 @@ These functions can be called on plural data to aggregate it into a singular exp - `NDISTINCT`: counts how many unique values exist in a plural expression (special: see collection aggregations). - `VAR`: the basic operation for variance, which is used to create the other variance functions with different types of keyword arguments. Note: `VAR` is not a valid PyDough function operator, but it is used internally to represent the basic variance operation. - `STD`: the basic operation for standard deviation, which is used to create the other standard deviation functions with different types of keyword arguments. Note: `STD` is not a valid PyDough function operator, but it is used internally to represent the basic standard deviation operation. -- `SAMPLE_VARIANCE`: returns the sample variance of the values of a plural expression. +- `SAMPLE_VAR`: returns the sample variance of the values of a plural expression. - `SAMPLE_STD`: returns the sample standard deviation of the values of a plural expression. -- `POPULATION_VARIANCE`: returns the population variance of the values of a plural expression. +- `POPULATION_VAR`: returns the population variance of the values of a plural expression. - `POPULATION_STD`: returns the population standard deviation of the values of a plural expression. ##### Collection Aggregations diff --git a/pydough/pydough_operators/expression_operators/__init__.py b/pydough/pydough_operators/expression_operators/__init__.py index e631b3f9d..97698c6e8 100644 --- a/pydough/pydough_operators/expression_operators/__init__.py +++ b/pydough/pydough_operators/expression_operators/__init__.py @@ -64,7 +64,7 @@ "NOT", "PERCENTILE", "POPULATION_STD", - "POPULATION_VARIANCE", + "POPULATION_VAR", "POW", "POWER", "PRESENT", @@ -81,7 +81,7 @@ "ROUND", "RPAD", "SAMPLE_STD", - "SAMPLE_VARIANCE", + "SAMPLE_VAR", "SECOND", "SIGN", "SLICE", @@ -163,7 +163,7 @@ NOT, PERCENTILE, POPULATION_STD, - POPULATION_VARIANCE, + POPULATION_VAR, POW, POWER, PRESENT, @@ -179,7 +179,7 @@ ROUND, RPAD, SAMPLE_STD, - SAMPLE_VARIANCE, + SAMPLE_VAR, SECOND, SIGN, SLICE, diff --git a/pydough/pydough_operators/expression_operators/registered_expression_operators.py b/pydough/pydough_operators/expression_operators/registered_expression_operators.py index b882a5c23..b03cf5e4f 100644 --- a/pydough/pydough_operators/expression_operators/registered_expression_operators.py +++ b/pydough/pydough_operators/expression_operators/registered_expression_operators.py @@ -58,7 +58,7 @@ "NOT", "PERCENTILE", "POPULATION_STD", - "POPULATION_VARIANCE", + "POPULATION_VAR", "POW", "POWER", "PRESENT", @@ -74,7 +74,7 @@ "ROUND", "RPAD", "SAMPLE_STD", - "SAMPLE_VARIANCE", + "SAMPLE_VAR", "SECOND", "SIGN", "SLICE", @@ -286,8 +286,8 @@ kwarg_defaults={"type": "population"}, ) # Define VAR with keyword branching for "type" which is represented internally. -POPULATION_VARIANCE = VAR.with_kwarg("POPULATION_VARIANCE", {"type": "population"}) -SAMPLE_VARIANCE = VAR.with_kwarg("SAMPLE_VARIANCE", {"type": "sample"}) +POPULATION_VAR = VAR.with_kwarg("POPULATION_VAR", {"type": "population"}) +SAMPLE_VAR = VAR.with_kwarg("SAMPLE_VAR", {"type": "sample"}) # Define STD with keyword branching STD = KeywordBranchingExpressionFunctionOperator( diff --git a/pydough/qdag/collections/augmenting_child_operator.py b/pydough/qdag/collections/augmenting_child_operator.py index c5f783026..6f8c1fcdc 100644 --- a/pydough/qdag/collections/augmenting_child_operator.py +++ b/pydough/qdag/collections/augmenting_child_operator.py @@ -87,7 +87,6 @@ def get_term(self, term_name: str) -> PyDoughQDAG: term = Reference(self.preceding_context, term_name) return term - @cache def to_string(self) -> str: return f"{self.preceding_context.to_string()}.{self.standalone_string}" diff --git a/pydough/sqlglot/transform_bindings/base_transform_bindings.py b/pydough/sqlglot/transform_bindings/base_transform_bindings.py index 716eeeccb..8f247ce6f 100644 --- a/pydough/sqlglot/transform_bindings/base_transform_bindings.py +++ b/pydough/sqlglot/transform_bindings/base_transform_bindings.py @@ -231,9 +231,9 @@ def convert_call_to_sqlglot( return self.convert_monotonic(args, types) case pydop.SQRT: return self.convert_sqrt(args, types) - case pydop.POPULATION_VARIANCE: + case pydop.POPULATION_VAR: return self.convert_variance(args, types, "population") - case pydop.SAMPLE_VARIANCE: + case pydop.SAMPLE_VAR: return self.convert_variance(args, types, "sample") case pydop.POPULATION_STD: return self.convert_std(args, types, "population") diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 539224e92..5be88dd60 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2965,13 +2965,13 @@ def test_pipeline_e2e_tpch_custom( pytest.param( simple_scan, [], - "Column selection must not be empty", + "Expected `columns` argument to be a non-empty list", id="bad_columns_1", ), pytest.param( simple_scan, {}, - "Column selection must not be empty", + "Expected `columns` argument to be a non-empty dictionary", id="bad_columns_2", ), pytest.param( @@ -3000,7 +3000,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_1, None, re.escape( - "Unrecognized term of TPCH.customers: 'c_name'. Did you mean: name, comment, phone?" + "Unrecognized term of TPCH.customers: 'c_name'. Did you mean: name, key, phone?" ), id="bad_name_1", ), @@ -3016,7 +3016,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_3, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(foo=1, bar=2, fizz=3, BUZZ=4): 'fizzbuzz'. Did you mean: fizz, BUZZ, bar?" + "Unrecognized term of TPCH.CALCULATE(foo=1, bar=2, fizz=3, BUZZ=4): 'fizzbuzz'. Did you mean: fizz, BUZZ, foo?" ), id="bad_name_3", ), @@ -3024,7 +3024,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_4, None, re.escape( - "Unrecognized term of TPCH.customers.orders: 'totalPrice'. Did you mean: total_price, clerk, lines?" + "Unrecognized term of TPCH.customers.orders: 'totalPrice'. Did you mean: total_price, clerk, key?" ), id="bad_name_4", ), @@ -3032,7 +3032,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_5, None, re.escape( - "Unrecognized term of TPCH.customers.orders: 'c_name'. Did you mean: clerk, comment, customer, lines, key, order_date?" + "Unrecognized term of TPCH.customers.orders: 'c_name'. Did you mean: key, lines, clerk, comment, customer?" ), id="bad_name_5", ), @@ -3040,7 +3040,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_6, None, re.escape( - "Unrecognized term of TPCH.customers: 'suppliers'. Did you mean: orders, address, phone, comment, key, name, nation?" + "Unrecognized term of TPCH.customers: 'suppliers'. Did you mean: orders, key, name, address, phone, nation?" ), id="bad_name_6", ), @@ -3056,7 +3056,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_8, None, re.escape( - "Unrecognized term of TPCH.customers: 'n123ame'. Did you mean: name, nation, phone?" + "Unrecognized term of TPCH.customers: 'n123ame'. Did you mean: name, key, phone?" ), id="bad_name_8", ), @@ -3064,7 +3064,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_9, None, re.escape( - "Unrecognized term of TPCH.customers: '__phone__'. Did you mean: phone, nation, address?" + "Unrecognized term of TPCH.customers: '__phone__'. Did you mean: phone, key, name?" ), id="bad_name_9", ), @@ -3096,7 +3096,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_13, None, re.escape( - "Unrecognized term of TPCH.customers: 'thisisareallylargename_that_exceeds_the_system_limit'. Did you mean: market_segment, account_balance, nation_key, address?" + "Unrecognized term of TPCH.customers: 'thisisareallylargename_that_exceeds_the_system_limit'. Did you mean: market_segment, name, orders, address, key, phone, nation, nation_key?" ), id="bad_name_13", ), @@ -3104,7 +3104,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_14, None, re.escape( - "Unrecognized term of TPCH.customers: 'keyname'. Did you mean: name, key, phone?" + "Unrecognized term of TPCH.customers: 'keyname'. Did you mean: key, name, phone?" ), id="bad_name_14", ), @@ -3112,7 +3112,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_15, None, re.escape( - "Unrecognized term of TPCH.customers: 'namekey'. Did you mean: name, key, nation, nation_key?" + "Unrecognized term of TPCH.customers: 'namekey'. Did you mean: name, key, nation?" ), id="bad_name_15", ), @@ -3120,7 +3120,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_16, None, re.escape( - "Unrecognized term of TPCH.customers: 'no_exist'. Did you mean: comment, name, nation, orders, address, key, phone?" + "Unrecognized term of TPCH.customers: 'no_exist'. Did you mean: name, key, comment, nation, orders, phone, address?" ), id="bad_name_16", ), @@ -3136,7 +3136,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_18, None, re.escape( - "Unrecognized term of TPCH.Partition(orders.CALCULATE(year=YEAR(order_date)), name='years', by=year).CALCULATE(n_orders=COUNT(orders)).orders: 'nords'. Did you mean: n_orders, lines, clerk, key, year?" + "Unrecognized term of TPCH.Partition(orders.CALCULATE(year=YEAR(order_date)), name='years', by=year).CALCULATE(n_orders=COUNT(orders)).orders: 'nords'. Did you mean: n_orders, key, lines, year, clerk?" ), id="bad_name_18", ), @@ -3152,7 +3152,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_20, None, re.escape( - "Unrecognized term of TPCH.Partition(orders.CALCULATE(year=YEAR(order_date)), name='years', by=year).CALCULATE(n_orders=COUNT(orders)).orders: 'orders'. Did you mean: n_orders, clerk, lines?" + "Unrecognized term of TPCH.Partition(orders.CALCULATE(year=YEAR(order_date)), name='years', by=year).CALCULATE(n_orders=COUNT(orders)).orders: 'orders'. Did you mean: n_orders, clerk, key, lines, year?" ), id="bad_name_20", ), @@ -3168,7 +3168,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_22, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'Over_Intellectual_Ization'. Did you mean: over_intellect_ualiz_ation, OVERIN_tellectualizers, De_Institutionalizations?" + "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'Over_Intellectual_Ization'. Did you mean: over_intellect_ualiz_ation, OVERIN_tellectualizers, PROFESSION_alization?" ), id="bad_name_22", ), @@ -3176,7 +3176,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_23, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'paio_eo_aliz_ation'. Did you mean: PROFESSION_alization, over_intellect_ualiz_ation, anthro_pomorph_IZATION?" + "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'paio_eo_aliz_ation'. Did you mean: PROFESSION_alization, nations, parts, regions?" ), id="bad_name_23", ), @@ -3184,7 +3184,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_24, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): '_a_r_h_x_n_t_p_o_q__z_m_o_p_i__a_o_n_z_'. Did you mean: anthro_pomorph_IZATION, over_intellect_ualiz_ation, De_Institutionalizations?" + "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): '_a_r_h_x_n_t_p_o_q__z_m_o_p_i__a_o_n_z_'. Did you mean: nations, parts, anthro_pomorph_IZATION, lines, regions?" ), id="bad_name_24", ), @@ -3192,7 +3192,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_25, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'anthropomorphization_and_overintellectualization_and_ultrarevolutionaries'. Did you mean: over_intellect_ualiz_ation, OVERIN_tellectualizers, anthro_pomorph_IZATION, ultra_revolution_aries, De_Institutionalizations?" + "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'anthropomorphization_and_overintellectualization_and_ultrarevolutionaries'. Did you mean: over_intellect_ualiz_ation, anthro_pomorph_IZATION, OVERIN_tellectualizers, ultra_revolution_aries?" ), id="bad_name_25", ), @@ -3226,7 +3226,7 @@ def test_pipeline_e2e_tpch_custom( bad_cross_5, None, re.escape( - "Unrecognized term of TPCH.regions.CALCULATE(name=name).TPCH.regions.CALCULATE(name=name): 'regions'. Did you mean: nations, comment, key?" + "Unrecognized term of TPCH.regions.CALCULATE(name=name).TPCH.regions.CALCULATE(name=name): 'regions'. Did you mean: nations, key, name?" ), id="bad_cross_5", ), @@ -3234,11 +3234,12 @@ def test_pipeline_e2e_tpch_custom( bad_cross_6, None, re.escape( - "Unrecognized term of TPCH.suppliers.TPCH.parts: 'suppliers'. Did you mean: lines, supply_records, container, size, comment, key, name?" + "Unrecognized term of TPCH.suppliers.TPCH.parts: 'suppliers'. Did you mean: size, lines, key, name, supply_records?" ), id="bad_cross_6", ), - # NOTE: raised exception with an empty message + # TODO: fix the error handling here to give a proper error message + # (currently fails in hybrid due to an assertion) pytest.param( bad_cross_7, None, @@ -3249,7 +3250,7 @@ def test_pipeline_e2e_tpch_custom( bad_cross_8, None, re.escape( - "Unrecognized term of TPCH.regions.CALCULATE(r1=name).TPCH.nations: 'r_key'. Did you mean: key, name, r1?" + "Unrecognized term of TPCH.regions.CALCULATE(r1=name).TPCH.nations: 'r_key'. Did you mean: key, r1, name?" ), id="bad_cross_8", ), @@ -3268,7 +3269,7 @@ def test_pipeline_e2e_tpch_custom( pytest.param( bad_cross_11, None, - "Unrecognized term of TPCH.nations.TPCH.regions: 'customers'. Did you mean: comment, name, nations, key?", + "Unrecognized term of TPCH.nations.TPCH.regions: 'customers'. Did you mean: comment, name, key, nations?", id="bad_cross_11", ), pytest.param( diff --git a/tests/test_plan_refsols/simple_var_std.txt b/tests/test_plan_refsols/simple_var_std.txt index af3b1bbe0..5af72a1ec 100644 --- a/tests/test_plan_refsols/simple_var_std.txt +++ b/tests/test_plan_refsols/simple_var_std.txt @@ -2,5 +2,5 @@ ROOT(columns=[('name', n_name), ('var', pop_var), ('std', pop_std), ('sample_var JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) FILTER(condition=ISIN(n_name, ['ALGERIA', 'ARGENTINA']:array[unknown]), columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'pop_std': POPULATION_STD(s_acctbal), 'pop_var': POPULATION_VARIANCE(s_acctbal), 'sample_std': SAMPLE_STD(s_acctbal), 'sample_var': SAMPLE_VARIANCE(s_acctbal)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'pop_std': POPULATION_STD(s_acctbal), 'pop_var': POPULATION_VAR(s_acctbal), 'sample_std': SAMPLE_STD(s_acctbal), 'sample_var': SAMPLE_VAR(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/simple_var_std_with_nulls.txt b/tests/test_plan_refsols/simple_var_std_with_nulls.txt index 85f3089c8..a29f26c60 100644 --- a/tests/test_plan_refsols/simple_var_std_with_nulls.txt +++ b/tests/test_plan_refsols/simple_var_std_with_nulls.txt @@ -1,5 +1,5 @@ ROOT(columns=[('var_samp_0_nnull', var_samp_0_nnull), ('var_samp_1_nnull', var_samp_1_nnull), ('var_samp_2_nnull', var_samp_2_nnull), ('var_pop_0_nnull', var_pop_0_nnull), ('var_pop_1_nnull', var_pop_1_nnull), ('var_pop_2_nnull', var_pop_2_nnull), ('std_samp_0_nnull', std_samp_0_nnull), ('std_samp_1_nnull', std_samp_1_nnull), ('std_samp_2_nnull', std_samp_2_nnull), ('std_pop_0_nnull', std_pop_0_nnull), ('std_pop_1_nnull', std_pop_1_nnull), ('std_pop_2_nnull', std_pop_2_nnull)], orderings=[]) - AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(key_0), 'std_pop_1_nnull': POPULATION_STD(key_1), 'std_pop_2_nnull': POPULATION_STD(key_2), 'std_samp_0_nnull': SAMPLE_STD(key_0), 'std_samp_1_nnull': SAMPLE_STD(key_1), 'std_samp_2_nnull': SAMPLE_STD(key_2), 'var_pop_0_nnull': POPULATION_VARIANCE(key_0), 'var_pop_1_nnull': POPULATION_VARIANCE(key_1), 'var_pop_2_nnull': POPULATION_VARIANCE(key_2), 'var_samp_0_nnull': SAMPLE_VARIANCE(key_0), 'var_samp_1_nnull': SAMPLE_VARIANCE(key_1), 'var_samp_2_nnull': SAMPLE_VARIANCE(key_2)}) + AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(key_0), 'std_pop_1_nnull': POPULATION_STD(key_1), 'std_pop_2_nnull': POPULATION_STD(key_2), 'std_samp_0_nnull': SAMPLE_STD(key_0), 'std_samp_1_nnull': SAMPLE_STD(key_1), 'std_samp_2_nnull': SAMPLE_STD(key_2), 'var_pop_0_nnull': POPULATION_VAR(key_0), 'var_pop_1_nnull': POPULATION_VAR(key_1), 'var_pop_2_nnull': POPULATION_VAR(key_2), 'var_samp_0_nnull': SAMPLE_VAR(key_0), 'var_samp_1_nnull': SAMPLE_VAR(key_1), 'var_samp_2_nnull': SAMPLE_VAR(key_2)}) PROJECT(columns={'key_0': KEEP_IF(c_acctbal, c_custkey > 3:numeric), 'key_1': KEEP_IF(c_acctbal, c_custkey > 2:numeric), 'key_2': KEEP_IF(c_acctbal, c_custkey > 1:numeric)}) FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_pydough_functions/all_pydough_functions_dialects.py b/tests/test_pydough_functions/all_pydough_functions_dialects.py index b5fd4dec7..06419eff3 100644 --- a/tests/test_pydough_functions/all_pydough_functions_dialects.py +++ b/tests/test_pydough_functions/all_pydough_functions_dialects.py @@ -186,8 +186,10 @@ def aggregation_functions(): anything_value=ANYTHING(customers.account_balance), count_value=COUNT(customers.account_balance), count_distinct_value=NDISTINCT(customers.account_balance), - variance_value=VAR(customers.account_balance, type="sample"), - stddev_value=STD(customers.account_balance, type="sample"), + variance_s_value=VAR(customers.account_balance, type="sample"), + variance_p_value=VAR(customers.account_balance, type="population"), + stddev_s_value=STD(customers.account_balance, type="sample"), + stddev_p_value=STD(customers.account_balance, type="population"), ).WHERE(HAS(customers) & HASNOT(customers.orders)) diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index de266738c..da16a0086 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -126,8 +126,8 @@ id="kwargfunc_1", ), pytest.param( - "result = nations.CALCULATE(name=name, var=SAMPLE_VARIANCE(suppliers.account_balance))", - "PyDough object SAMPLE_VARIANCE is not callable. Did you mean: SLICE, REPLACE, MEDIAN, SIGN, STRING, YEAR, ISIN, MIN, STRIP, SUM, UPPER, VAR?", + "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", + "PyDough object SAMPLE_VAR is not callable. Did you mean: YEAR, SUM, UPPER, VAR, AVG, LPAD, PREV, RPAD, DAY, FLOAT, FLOOR, HAS, LOWER, MAX, POWER, SLICE, SMALLEST, SQRT, STD, ABS, CEIL, GETPART, HOUR, LIKE, MEDIAN, NEXT, QUARTER, RELAVG, REPLACE, SECOND, SIGN, STRIP?", id="kwargfunc_2", ), pytest.param( diff --git a/tests/test_sql_refsols/aggregation_functions_ansi.sql b/tests/test_sql_refsols/aggregation_functions_ansi.sql index dd83c4df2..96a109798 100644 --- a/tests/test_sql_refsols/aggregation_functions_ansi.sql +++ b/tests/test_sql_refsols/aggregation_functions_ansi.sql @@ -16,8 +16,10 @@ WITH _s1 AS ( MEDIAN(customer.c_acctbal) AS median_c_acctbal, MIN(customer.c_acctbal) AS min_c_acctbal, COUNT(DISTINCT customer.c_acctbal) AS ndistinct_c_acctbal, + STDDEV_POP(customer.c_acctbal) AS population_std_c_acctbal, + VARIANCE_POP(customer.c_acctbal) AS population_var_c_acctbal, STDDEV(customer.c_acctbal) AS sample_std_c_acctbal, - VARIANCE(customer.c_acctbal) AS sample_variance_c_acctbal, + VARIANCE(customer.c_acctbal) AS sample_var_c_acctbal, SUM(customer.c_acctbal) AS sum_c_acctbal, SUM(_s1.n_rows) AS sum_n_rows, customer.c_nationkey @@ -37,8 +39,10 @@ SELECT _t1.anything_c_acctbal AS anything_value, _t1.count_c_acctbal AS count_value, _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value + _t1.sample_var_c_acctbal AS variance_s_value, + _t1.population_var_c_acctbal AS variance_p_value, + _t1.sample_std_c_acctbal AS stddev_s_value, + _t1.population_std_c_acctbal AS stddev_p_value FROM tpch.nation AS nation JOIN _t1 AS _t1 ON _t1.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/aggregation_functions_sqlite.sql b/tests/test_sql_refsols/aggregation_functions_sqlite.sql index c8aaa7986..c2f829e61 100644 --- a/tests/test_sql_refsols/aggregation_functions_sqlite.sql +++ b/tests/test_sql_refsols/aggregation_functions_sqlite.sql @@ -19,12 +19,12 @@ WITH _s1 AS ( ) < 1.0 THEN customer.c_acctbal ELSE NULL - END AS expr_15, + END AS expr_17, CASE WHEN CAST(0.19999999999999996 * COUNT(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal DESC) THEN customer.c_acctbal ELSE NULL - END AS expr_16, + END AS expr_18, customer.c_acctbal, customer.c_nationkey, _s1.n_rows @@ -35,12 +35,35 @@ WITH _s1 AS ( SELECT MAX(c_acctbal) AS anything_c_acctbal, AVG(c_acctbal) AS avg_c_acctbal, - AVG(expr_15) AS avg_expr_15, + AVG(expr_17) AS avg_expr_17, COUNT(c_acctbal) AS count_c_acctbal, MAX(c_acctbal) AS max_c_acctbal, - MAX(expr_16) AS max_expr_16, + MAX(expr_18) AS max_expr_18, MIN(c_acctbal) AS min_c_acctbal, COUNT(DISTINCT c_acctbal) AS ndistinct_c_acctbal, + POWER( + ( + CAST(( + SUM(( + POWER(c_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(c_acctbal), 2) + ) AS REAL) / COUNT(c_acctbal) + ) + ) AS REAL) / COUNT(c_acctbal) + ), + 0.5 + ) AS population_std_c_acctbal, + CAST(( + SUM(( + POWER(c_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(c_acctbal), 2) + ) AS REAL) / COUNT(c_acctbal) + ) + ) AS REAL) / COUNT(c_acctbal) AS population_var_c_acctbal, POWER( ( CAST(( @@ -67,7 +90,7 @@ WITH _s1 AS ( ) ) AS REAL) / ( COUNT(c_acctbal) - 1 - ) AS sample_variance_c_acctbal, + ) AS sample_var_c_acctbal, SUM(c_acctbal) AS sum_c_acctbal, SUM(n_rows) AS sum_n_rows, c_nationkey @@ -78,15 +101,17 @@ WITH _s1 AS ( SELECT COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, _t1.avg_c_acctbal AS avg_value, - _t1.avg_expr_15 AS median_value, + _t1.avg_expr_17 AS median_value, _t1.min_c_acctbal AS min_value, _t1.max_c_acctbal AS max_value, - _t1.max_expr_16 AS quantile_value, + _t1.max_expr_18 AS quantile_value, _t1.anything_c_acctbal AS anything_value, _t1.count_c_acctbal AS count_value, _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value + _t1.sample_var_c_acctbal AS variance_s_value, + _t1.population_var_c_acctbal AS variance_p_value, + _t1.sample_std_c_acctbal AS stddev_s_value, + _t1.population_std_c_acctbal AS stddev_p_value FROM tpch.nation AS nation JOIN _t1 AS _t1 ON _t1.c_nationkey = nation.n_nationkey From b94e76dc171281c3070590446908d799973fd157 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 14:30:23 -0400 Subject: [PATCH 056/143] WIP --- tests/test_pipeline_tpch_udf.py | 31 ++++++++++--------- .../bad_pydough_functions.py | 20 ------------ tests/testing_utilities.py | 6 ++-- 3 files changed, 21 insertions(+), 36 deletions(-) diff --git a/tests/test_pipeline_tpch_udf.py b/tests/test_pipeline_tpch_udf.py index 42ec6c57d..002363cbf 100644 --- a/tests/test_pipeline_tpch_udf.py +++ b/tests/test_pipeline_tpch_udf.py @@ -11,13 +11,6 @@ from pydough.database_connectors import DatabaseContext, DatabaseDialect from pydough.metadata import GraphMetadata -from pydough.unqualified import UnqualifiedNode -from tests.test_pydough_functions.bad_pydough_functions import ( - bad_sqlite_udf_1, - bad_sqlite_udf_2, - bad_sqlite_udf_3, - bad_sqlite_udf_4, -) from tests.test_pydough_functions.udf_pydough_functions import ( sqlite_udf_combine_strings, sqlite_udf_count_epsilon, @@ -412,32 +405,42 @@ def test_pipeline_e2e_tpch_sqlite_udf( @pytest.mark.parametrize( - "pydough_impl, error_message", + "pydough_text, error_message", [ pytest.param( - bad_sqlite_udf_1, + # Calling a UDF that requires 2 arguments with only 1 argument + "result = orders.CALCULATE(x=FORMAT_DATETIME('%Y'))", "Invalid operator invocation \"FORMAT_DATETIME('%Y')\": Expected 2 arguments, received 1", id="bad_sqlite_udf_1", ), pytest.param( - bad_sqlite_udf_2, + # Calling a UDF that requires 2 arguments with 3 arguments + "result = orders.CALCULATE(x=FORMAT_DATETIME('%Y' order_date, 'foo'))", "Invalid operator invocation \"FORMAT_DATETIME('%Y', order_date, 'foo')\": Expected 2 arguments, received 3", id="bad_sqlite_udf_2", ), pytest.param( - bad_sqlite_udf_3, + # Calling a UDF that requires 1-2 arguments with 0 arguments + "result = nations.CALCULATE(x=GCAT(by=name.ASC()))", "Invalid operator invocation 'GCAT()': Expected between 1 and 2 arguments inclusive, received 0", id="bad_sqlite_udf_3", ), pytest.param( - bad_sqlite_udf_4, + # Calling a UDF that requires 1-2 arguments with 3 arguments + "result = nations.CALCULATE(x=GCAT(name, ';', 'bar', by=name.ASC()))", "Invalid operator invocation \"GCAT(name, ';', 'bar')\": Expected between 1 and 2 arguments inclusive, received 3.", id="bad_sqlite_udf_4", ), + pytest.param( + # Calling a UDF function that doesn't exist + "result = order.CALCULATE(x=fmtdate('%Y', order_date))", + "Invalid operator invocation \"GCAT(name, ';', 'bar')\": Expected between 1 and 2 arguments inclusive, received 3.", + id="bad_sqlite_udf_5", + ), ], ) def test_pipeline_tpch_sqlite_udf_errors( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_text: str, error_message: str, get_udf_graph: graph_fetcher, ): @@ -446,7 +449,7 @@ def test_pipeline_tpch_sqlite_udf_errors( """ graph: GraphMetadata = get_udf_graph("TPCH_SQLITE_UDFS") run_e2e_error_test( - pydough_impl, + pydough_text, re.escape(error_message), graph, ) diff --git a/tests/test_pydough_functions/bad_pydough_functions.py b/tests/test_pydough_functions/bad_pydough_functions.py index fe6cb7f65..f019fb8c2 100644 --- a/tests/test_pydough_functions/bad_pydough_functions.py +++ b/tests/test_pydough_functions/bad_pydough_functions.py @@ -483,26 +483,6 @@ def bad_name_25(): ) -def bad_sqlite_udf_1(): - # Calling a UDF that requires 2 arguments with only 1 argument - return orders.CALCULATE(x=FORMAT_DATETIME("%Y")) - - -def bad_sqlite_udf_2(): - # Calling a UDF that requires 2 arguments with 3 arguments - return orders.CALCULATE(x=FORMAT_DATETIME("%Y", order_date, "foo")) - - -def bad_sqlite_udf_3(): - # Calling a UDF that requires 1-2 arguments with 0 arguments - return nations.CALCULATE(x=GCAT(by=name.ASC())) - - -def bad_sqlite_udf_4(): - # Calling a UDF that requires 1-2 arguments with 3 arguments - return nations.CALCULATE(x=GCAT(name, ";", "bar", by=name.ASC())) - - # TEST for CROSS def bad_cross_1(): # Reason it is bad: Using `CROSS` with a not a collection diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index ac2274bcd..fd78c30db 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1226,7 +1226,7 @@ def run_e2e_test( def run_e2e_error_test( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[[], UnqualifiedNode] | str, error_message: str, graph: GraphMetadata, columns: dict[str, str] | list[str] | None = None, @@ -1239,7 +1239,8 @@ def run_e2e_error_test( provided `error_message`. Args: - `pydough_impl`: The PyDough function to be tested. + `pydough_impl`: The PyDough function to be tested, or the string that + should be evaluated to obtain the PyDough code. `error_message`: The error message that is expected to be raised. `graph`: The metadata graph to use for the test. `columns`: The columns argument to use for the test, if any. @@ -1247,6 +1248,7 @@ def run_e2e_error_test( `config`: The PyDough configuration to use for the test, if any. """ with pytest.raises(Exception, match=error_message): + assert not isinstance(pydough_impl, str) root: UnqualifiedNode = transform_and_exec_pydough(pydough_impl, graph) call_kwargs: dict = {} if graph is not None: From 19a0fb84d750fb5843c70984d2bb1d6b73c26576 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 14:35:53 -0400 Subject: [PATCH 057/143] Resolving conflicts and fixing UDF tests --- tests/test_pipeline_tpch_udf.py | 6 +++--- tests/test_plan_refsols/simple_var_std_with_nulls.txt | 9 +-------- tests/test_sql_refsols/defog_broker_adv15_ansi.sql | 4 +++- tests/test_sql_refsols/defog_broker_adv15_sqlite.sql | 4 +++- tests/testing_utilities.py | 3 --- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/test_pipeline_tpch_udf.py b/tests/test_pipeline_tpch_udf.py index 002363cbf..cb58bf775 100644 --- a/tests/test_pipeline_tpch_udf.py +++ b/tests/test_pipeline_tpch_udf.py @@ -415,7 +415,7 @@ def test_pipeline_e2e_tpch_sqlite_udf( ), pytest.param( # Calling a UDF that requires 2 arguments with 3 arguments - "result = orders.CALCULATE(x=FORMAT_DATETIME('%Y' order_date, 'foo'))", + "result = orders.CALCULATE(x=FORMAT_DATETIME('%Y', order_date, 'foo'))", "Invalid operator invocation \"FORMAT_DATETIME('%Y', order_date, 'foo')\": Expected 2 arguments, received 3", id="bad_sqlite_udf_2", ), @@ -433,8 +433,8 @@ def test_pipeline_e2e_tpch_sqlite_udf( ), pytest.param( # Calling a UDF function that doesn't exist - "result = order.CALCULATE(x=fmtdate('%Y', order_date))", - "Invalid operator invocation \"GCAT(name, ';', 'bar')\": Expected between 1 and 2 arguments inclusive, received 3.", + "result = order.CALCULATE(x=FORMATDATETIME('%Y', order_date))", + "PyDough object FORMATDATETIME is not callable. Did you mean: FORMAT_DATETIME, DATETIME, FLOAT?", id="bad_sqlite_udf_5", ), ], diff --git a/tests/test_plan_refsols/simple_var_std_with_nulls.txt b/tests/test_plan_refsols/simple_var_std_with_nulls.txt index 811417dab..620efe547 100644 --- a/tests/test_plan_refsols/simple_var_std_with_nulls.txt +++ b/tests/test_plan_refsols/simple_var_std_with_nulls.txt @@ -1,11 +1,4 @@ ROOT(columns=[('var_samp_0_nnull', var_samp_0_nnull), ('var_samp_1_nnull', var_samp_1_nnull), ('var_samp_2_nnull', var_samp_2_nnull), ('var_pop_0_nnull', var_pop_0_nnull), ('var_pop_1_nnull', var_pop_1_nnull), ('var_pop_2_nnull', var_pop_2_nnull), ('std_samp_0_nnull', std_samp_0_nnull), ('std_samp_1_nnull', std_samp_1_nnull), ('std_samp_2_nnull', std_samp_2_nnull), ('std_pop_0_nnull', std_pop_0_nnull), ('std_pop_1_nnull', std_pop_1_nnull), ('std_pop_2_nnull', std_pop_2_nnull)], orderings=[]) -<<<<<<< HEAD - AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(key_0), 'std_pop_1_nnull': POPULATION_STD(key_1), 'std_pop_2_nnull': POPULATION_STD(key_2), 'std_samp_0_nnull': SAMPLE_STD(key_0), 'std_samp_1_nnull': SAMPLE_STD(key_1), 'std_samp_2_nnull': SAMPLE_STD(key_2), 'var_pop_0_nnull': POPULATION_VAR(key_0), 'var_pop_1_nnull': POPULATION_VAR(key_1), 'var_pop_2_nnull': POPULATION_VAR(key_2), 'var_samp_0_nnull': SAMPLE_VAR(key_0), 'var_samp_1_nnull': SAMPLE_VAR(key_1), 'var_samp_2_nnull': SAMPLE_VAR(key_2)}) - PROJECT(columns={'key_0': KEEP_IF(c_acctbal, c_custkey > 3:numeric), 'key_1': KEEP_IF(c_acctbal, c_custkey > 2:numeric), 'key_2': KEEP_IF(c_acctbal, c_custkey > 1:numeric)}) - FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) -======= - AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_pop_1_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_pop_2_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'std_samp_0_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_samp_1_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_samp_2_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_pop_0_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_pop_1_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_pop_2_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_samp_0_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_samp_1_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_samp_2_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 1:numeric))}) + AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_pop_1_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_pop_2_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'std_samp_0_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_samp_1_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_samp_2_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_pop_0_nnull': POPULATION_VAR(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_pop_1_nnull': POPULATION_VAR(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_pop_2_nnull': POPULATION_VAR(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_samp_0_nnull': SAMPLE_VAR(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_samp_1_nnull': SAMPLE_VAR(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_samp_2_nnull': SAMPLE_VAR(KEEP_IF(c_acctbal, c_custkey > 1:numeric))}) FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) ->>>>>>> kian/pagerank diff --git a/tests/test_sql_refsols/defog_broker_adv15_ansi.sql b/tests/test_sql_refsols/defog_broker_adv15_ansi.sql index 7ada2b05b..865bf4ed4 100644 --- a/tests/test_sql_refsols/defog_broker_adv15_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv15_ansi.sql @@ -1,6 +1,8 @@ SELECT sbcustcountry AS country, - 100 * COALESCE(COALESCE(SUM(sbcuststatus = 'active'), 0) / COUNT(*), 0.0) AS ar + 100 * ( + COALESCE(SUM(sbcuststatus = 'active'), 0) / COUNT(*) + ) AS ar FROM main.sbcustomer WHERE sbcustjoindate <= '2022-12-31' AND sbcustjoindate >= '2022-01-01' diff --git a/tests/test_sql_refsols/defog_broker_adv15_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv15_sqlite.sql index e2663ce7b..27a090ce1 100644 --- a/tests/test_sql_refsols/defog_broker_adv15_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv15_sqlite.sql @@ -1,6 +1,8 @@ SELECT sbcustcountry AS country, - 100 * COALESCE(CAST(COALESCE(SUM(sbcuststatus = 'active'), 0) AS REAL) / COUNT(*), 0.0) AS ar + 100 * ( + CAST(COALESCE(SUM(sbcuststatus = 'active'), 0) AS REAL) / COUNT(*) + ) AS ar FROM main.sbcustomer WHERE sbcustjoindate <= '2022-12-31' AND sbcustjoindate >= '2022-01-01' diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 887d386e3..321286516 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1297,9 +1297,6 @@ def run_e2e_error_test( `config`: The PyDough configuration to use for the test, if any. """ with pytest.raises(Exception, match=error_message): - assert not isinstance(pydough_impl, str), ( - "Expected pydough_impl to be a callable, not a string" - ) root: UnqualifiedNode = transform_and_exec_pydough(pydough_impl, graph, None) call_kwargs: dict = {} if graph is not None: From bf69fe8f2a072b1b27d57f614deaa1449e8fbed0 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 15:07:13 -0400 Subject: [PATCH 058/143] Moved window errors --- pydough/errors/pydough_error_builder.py | 54 ++++++++++++- pydough/unqualified/qualification.py | 101 +++++++++++++----------- tests/test_qualification_errors.py | 19 +++-- 3 files changed, 121 insertions(+), 53 deletions(-) diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index 9582f0295..12ec8bb5b 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -16,7 +16,7 @@ from pydough.pydough_operators import PyDoughOperator from pydough.qdag import PyDoughCollectionQDAG, PyDoughExpressionQDAG from pydough.relational import CallExpression - from pydough.unqualified import UnqualifiedNode + from pydough.unqualified import UnqualifiedNode, UnqualifiedWindow class PyDoughErrorBuilder: @@ -290,3 +290,55 @@ def undefined_function_call( else: error_message += " Did you mean to use a function?" return PyDoughUnqualifiedException(error_message) + + def bad_window_per( + self, + per: str, + ancestral_names: list[str], + context: "PyDoughCollectionQDAG", + window: "UnqualifiedWindow", + ) -> PyDoughException: + """ + Creates an exception for when the `per` string in a window is malformed. + + Args: + `per`: The per string that caused the error. + `ancestral_names`: The names of the ancestors in the context. + `context`: The collection context where the error occurred. + `window`: The unqualified window that contains the per string. + + Returns: + An exception indicating the malformed per string. + """ + ancestor_name: str + ancestor_idx: int | None = None + msg: str | None = None + components: list[str] = per.split(":") + + # Extract the name/idx components of `per=name:idx`, identifying an + # error if not in that format. + if len(components) <= 2: + if len(components) == 1: + ancestor_name = components[0] + ancestor_idx = None + elif len(components) == 2: + ancestor_name = components[0] + if not components[1].isdigit() or int(components[1]) <= 0: + msg = "expected the index after ':' to be a positive integer" + else: + ancestor_idx = int(components[1]) + # If an error was not found yet, figure out what is wrong with + # `name` or `idx`. + if msg is None: + if ancestor_name not in ancestral_names: + msg = f"unrecognized ancestor {ancestor_name!r}" + elif ancestor_idx is None and ancestral_names.count(ancestor_name) > 1: + msg = f"per-string {ancestor_name!r} is ambiguous in this context; use the form '{ancestor_name}:index' to disambiguate, where '{ancestor_name}:1' refers to the most recent ancestor" + else: + msg = f"there are not {ancestor_idx} ancestors of the current context with name {ancestor_name!r}" + else: + msg = f"expected 0 or 1 ':', found {len(components) - 1})" + + return PyDoughUnqualifiedException( + f"Error while parsing 'per' string of {window} in context {context} ({msg})" + ) diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index 82783d49f..57b125e32 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -208,6 +208,59 @@ def qualify_binary_operation( operator, [qualified_lhs, qualified_rhs] ) + def extract_window_per_args( + self, + per: str, + ancestral_names: list[str], + context: PyDoughCollectionQDAG, + window: UnqualifiedWindow, + ) -> tuple[str, int | None]: + ancestor_name: str + ancestor_idx: int | None + # Break down the per string into its components, which is either + # `[name]`, or `[name, index]`, where `index` must be a positive + # integer. + components: list[str] = per.split(":") + if len(components) == 1: + ancestor_name = components[0] + ancestor_idx = None + elif len(components) == 2: + ancestor_name = components[0] + if not components[1].isdigit(): + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + ancestor_idx = int(components[1]) + if ancestor_idx <= 0: + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + else: + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + # Verify that `name` corresponds to one of the ancestors of the + # current context. + if ancestor_name not in ancestral_names: + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + # Verify that `name` is only present exactly one time in the + # ancestors of the current context, unless an index was provided. + if ancestor_idx is None: + if ancestral_names.count(ancestor_name) > 1: + # TODO: potentially add a default value of 1? + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + elif ancestral_names.count(ancestor_name) < ancestor_idx: + # If an index was provided, ensure that there are that many + # ancestors with that name. + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + return ancestor_name, ancestor_idx + def qualify_window( self, unqualified: UnqualifiedWindow, @@ -259,51 +312,9 @@ def qualify_window( # the number of ancestor levels to go up to). if per is not None: ancestral_names: list[str] = context.get_ancestral_names() - ancestor_name: str - ancestor_idx: int | None - # Break down the per string into its components, which is either - # `[name]`, or `[name, index]`, where `index` must be a positive - # integer. - components: list[str] = per.split(":") - if len(components) == 1: - ancestor_name = components[0] - ancestor_idx = None - elif len(components) == 2: - ancestor_name = components[0] - if not components[1].isdigit(): - raise PyDoughUnqualifiedException( - f"Malformed per string: {per!r} (expected the index after ':' to be a positive integer)" - ) - ancestor_idx = int(components[1]) - if ancestor_idx <= 0: - raise PyDoughUnqualifiedException( - f"Malformed per string: {per!r} (expected the index after ':' to be a positive integer)" - ) - else: - raise PyDoughUnqualifiedException( - f"Malformed per string: {per!r} (expected 0 or 1 ':', found {len(components) - 1})" - ) - # Verify that `name` corresponds to one of the ancestors of the - # current context. - if ancestor_name not in ancestral_names: - raise PyDoughUnqualifiedException( - f"Per string refers to unrecognized ancestor {ancestor_name!r} of {context!r}" - ) - # Verify that `name` is only present exactly one time in the - # ancestors of the current context, unless an index was provided. - if ancestor_idx is None: - if ancestral_names.count(ancestor_name) > 1: - # TODO: potentially add a default value of 1? - raise PyDoughUnqualifiedException( - f"Per string {per!r} is ambiguous for {context!r}. Use the form '{per}:index' to disambiguate, where '{per}:1' refers to the most recent ancestor." - ) - elif ancestral_names.count(ancestor_name) < ancestor_idx: - # If an index was provided, ensure that there are that many - # ancestors with that name. - raise PyDoughUnqualifiedException( - f"Per string {per!r} invalid as there are not {ancestor_idx} ancestors of the current context with name {ancestor_name!r}." - ) - + ancestor_name, ancestor_idx = self.extract_window_per_args( + per, ancestral_names, context, unqualified + ) # Find how many levels upward need to be traversed to find the # targeted ancestor by finding the nth ancestor matching the # name, at the end of the ancestral_names. diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index da16a0086..6f1726c66 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -87,39 +87,44 @@ ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='custs'))", - "Per string refers to unrecognized ancestor 'custs' of TPCH.customers.orders", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='custs') in context TPCH.customers.orders (unrecognized ancestor 'custs')", id="bad_per_1", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:2'))", - "Per string 'customers:2' invalid as there are not 2 ancestors of the current context with name 'customers'.", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:2') in context TPCH.customers.orders (there are not 2 ancestors of the current context with name 'customers')", id="bad_per_2", ), pytest.param( "result = customers.orders.customer.orders.lines.CALCULATE(RANKING(by=extended_price.DESC(), per='orders'))", - "Per string 'orders' is ambiguous for TPCH.customers.orders.customer.orders.lines. Use the form 'orders:index' to disambiguate, where 'orders:1' refers to the most recent ancestor.", + "Error while parsing 'per' string of RANKING(by=(extended_price.DESC(na_pos='last'), per='orders') in context TPCH.customers.orders.customer.orders.lines (per-string 'orders' is ambiguous in this context; use the form 'orders:index' to disambiguate, where 'orders:1' refers to the most recent ancestor)", id="bad_per_3", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:k'))", - "Malformed per string: 'customers:k' (expected the index after ':' to be a positive integer)", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:k') in context TPCH.customers.orders (expected the index after ':' to be a positive integer)", id="bad_per_4", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:1:2'))", - "Malformed per string: 'customers:1:2' (expected 0 or 1 ':', found 2)", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:1:2') in context TPCH.customers.orders (expected 0 or 1 ':', found 2))", id="bad_per_5", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:'))", - "Malformed per string: 'customers:' (expected the index after ':' to be a positive integer)", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:') in context TPCH.customers.orders (expected the index after ':' to be a positive integer)", id="bad_per_6", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:0'))", - "Malformed per string: 'customers:0' (expected the index after ':' to be a positive integer)", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:0') in context TPCH.customers.orders (expected the index after ':' to be a positive integer)", id="bad_per_7", ), + pytest.param( + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per=-1))", + "`per` argument must be a string", + id="bad_per_8", + ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", "PyDough object SAMPLE_VAR is not callable. Did you mean: YEAR, SUM, UPPER, VAR, AVG, LPAD, PREV, RPAD, DAY, FLOAT, FLOOR, HAS, LOWER, MAX, POWER, SLICE, SMALLEST, SQRT, STD, ABS, CEIL, GETPART, HOUR, LIKE, MEDIAN, NEXT, QUARTER, RELAVG, REPLACE, SECOND, SIGN, STRIP?", From c5fdfef1fb23ff5092ef362ca0df4b2c641dfdb9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 17:07:37 -0400 Subject: [PATCH 059/143] Updating helper [RUN CI] --- pydough/conversion/projection_pullup.py | 60 +++---------------- .../agg_simplification_1.txt | 5 +- tests/test_plan_refsols/common_prefix_al.txt | 23 ++++--- tests/test_plan_refsols/common_prefix_am.txt | 21 ++++--- tests/test_plan_refsols/common_prefix_n.txt | 49 ++++++++------- tests/test_plan_refsols/common_prefix_o.txt | 53 ++++++++-------- tests/test_plan_refsols/common_prefix_s.txt | 31 +++++----- tests/test_plan_refsols/correl_24.txt | 13 ++-- .../month_year_sliding_windows.txt | 4 +- .../mostly_positive_accounts_per_nation3.txt | 21 ++++--- .../multi_partition_access_5.txt | 34 +++++------ tests/test_plan_refsols/part_reduced_size.txt | 9 ++- tests/test_plan_refsols/simple_cross_5.txt | 4 +- tests/test_plan_refsols/singular7.txt | 4 +- .../test_plan_refsols/supplier_best_part.txt | 21 ++++--- .../technograph_monthly_incident_rate.txt | 11 ++-- ..._year_cumulative_incident_rate_overall.txt | 25 ++++---- tests/test_plan_refsols/tpch_q11.txt | 35 ++++++----- tests/test_plan_refsols/tpch_q18.txt | 17 +++--- tests/test_plan_refsols/tpch_q22.txt | 21 ++++--- .../defog_dealership_gen4_ansi.sql | 4 +- .../defog_dealership_gen4_sqlite.sql | 4 +- ...technograph_monthly_incident_rate_ansi.sql | 46 +++++++------- ...chnograph_monthly_incident_rate_sqlite.sql | 46 +++++++------- ..._cumulative_incident_rate_overall_ansi.sql | 4 +- ...umulative_incident_rate_overall_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q11_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q11_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q18_ansi.sql | 12 ++-- tests/test_sql_refsols/tpch_q18_sqlite.sql | 12 ++-- 30 files changed, 267 insertions(+), 330 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 356acca41..a2c891a1b 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -29,9 +29,6 @@ contains_window, transpose_expression, ) -from pydough.relational.relational_expressions.column_reference_finder import ( - ColumnReferenceFinder, -) from pydough.types import BooleanType, NumericType from .merge_projects import merge_adjacent_projects @@ -153,8 +150,6 @@ def pull_non_columns(node: Join | Filter | Limit) -> RelationalNode: def pull_project_helper( - output_columns: dict[str, RelationalExpression], - used_columns: set[RelationalExpression], project: Project, input_name: str | None, ) -> dict[RelationalExpression, RelationalExpression]: @@ -166,11 +161,6 @@ def pull_project_helper( to substitute the columns in the parent node's output columns or conditions. Args: - `output_columns`: The columns of the parent node that the expressions - from the project node can be pulled into. - `used_columns`: The set of expressions indicating invocations of the - columns from the project in the parent node, e.g. as a filter - or join condition, limit ordering, or aggregation key. `project`: The Project node to pull columns from. `input_name`: The name of the input to the parent node that the project node is connected to. This is used to add input names to the @@ -180,8 +170,7 @@ def pull_project_helper( A mapping of expressions that can be used to substitute the columns in the parent node's output columns or conditions. This mapping will ensure columns are only pulled up if they do not contain window - functions, and they are not simultaneously used in the parent's output - while also being used in the condition or orderings. + functions. """ # Ensure every column in the project's inputs is also present in the output # columns of the project. This will ensure that any function calls that are @@ -191,36 +180,15 @@ def pull_project_helper( widen_columns(project) ) - # Identify which columns from the project node are used in the condition - # or orderings, versus those used in the output columns of the parent. - finder: ColumnReferenceFinder = ColumnReferenceFinder() - - # First, the columns used in the output columns of the parent. - finder.reset() - for expr in output_columns.values(): - expr.accept(finder) - output_cols: set[ColumnReference] = finder.get_column_references() - output_names: set[str] = {col.name for col in output_cols} - - # Next the columns that are utilized by the node. - finder.reset() - for expr in used_columns: - expr.accept(finder) - used_cols: set[ColumnReference] = finder.get_column_references() - used_names: set[str] = {col.name for col in used_cols} - # Iterate through the columns of the project to see which ones can be - # pulled up into the parent's output columns vs condition/orderings, - # adding them to a substitutions mapping that will be used to apply the - # transformations. + # pulled up into the parent, dding them to a substitutions mapping that + # will be used to apply the transformations. substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): new_expr: RelationalExpression = add_input_name( apply_substitution(expr, transfer_substitutions, {}), input_name ) - if (not contains_window(new_expr)) and ( - (name in used_names) != (name in output_names) - ): + if not contains_window(new_expr): ref_expr: ColumnReference = ColumnReference( name, expr.data_type, input_name=input_name ) @@ -251,12 +219,7 @@ def pull_project_into_join(node: Join, input_index: int) -> None: # columns or condition, and modifies the project node in-place to ensure # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper( - node.columns, - {node.condition}, - project, - node.default_input_aliases[input_index], - ) + pull_project_helper(project, node.default_input_aliases[input_index]) ) # Apply the substitutions to the join's condition and output columns. @@ -286,7 +249,7 @@ def pull_project_into_filter(node: Filter) -> None: # columns or condition, and modifies the project node in-place to ensure # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper(node.columns, {node.condition}, node.input, None) + pull_project_helper(node.input, None) ) # Apply the substitutions to the filter's condition and output columns. @@ -316,12 +279,7 @@ def pull_project_into_limit(node: Limit) -> None: # columns or orderings, and modifies the project node in-place to ensure # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper( - node.columns, - {order_expr.expr for order_expr in node.orderings}, - node.input, - None, - ) + pull_project_helper(node.input, None) ) # Apply the substitutions to the limit's orderings and output columns. @@ -533,9 +491,7 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: # node in-place to ensure every column in the project's inputs is available # to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper( - dict(node.aggregations.items()), set(node.keys.values()), node.input, None - ) + pull_project_helper(node.input, None) ) # Build up the columns of a new project that points to all of the output diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt index 014c1dbdf..13689d041 100644 --- a/tests/test_plan_refsols/agg_simplification_1.txt +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -1,4 +1,3 @@ ROOT(columns=[('aug_exchange', aug_exchange), ('su1', DEFAULT_TO(count_one, 0:numeric)), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', DEFAULT_TO(0:numeric, 0:numeric)), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) - AGGREGATE(keys={'aug_exchange': aug_exchange}, aggregations={'agg_63': QUANTILE(aug_exchange, 0.8:numeric), 'count_one': COUNT()}) - PROJECT(columns={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}) - SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) + AGGREGATE(keys={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}, aggregations={'agg_63': QUANTILE(LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string)), 0.8:numeric), 'count_one': COUNT()}) + SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index d5af6cd67..d9d5391ca 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t0.n_rows}) - LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows_1, 0:numeric))], orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) + LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows_1}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index f42e5981e..5cd9dcaf3 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=sum_agg_3 > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_agg_3': SUM(agg_3)}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.agg_3, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 0235aa2fa..cbac8d607 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,25 +1,24 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 056f09af0..07efb3875 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,27 +1,26 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_small_parts': sum_sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 9d8b97da5..b061c0777 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,16 +1,15 @@ -ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) - PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_suppkey': NDISTINCT(l_suppkey)}) - FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', DEFAULT_TO(n_rows, 0:numeric)), ('most_recent_order_distinct', DEFAULT_TO(ndistinct_l_suppkey, 0:numeric))], orderings=[(c_name):asc_first]) + FILTER(condition=DEFAULT_TO(ndistinct_l_suppkey, 0:numeric) < DEFAULT_TO(n_rows, 0:numeric), columns={'c_name': c_name, 'n_rows': n_rows, 'ndistinct_l_suppkey': ndistinct_l_suppkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_suppkey': NDISTINCT(l_suppkey)}) + FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 0afce8592..5ecd14c7e 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,13 +1,10 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orders_in_range)], orderings=[(year_7):asc_first, (month_6):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'month_6': ANYTHING(month), 'n_orders_in_range': COUNT(), 'year_7': ANYTHING(year)}) FILTER(condition=MONOTONIC(prev_month_avg_price, o_totalprice, avg_o_totalprice) | MONOTONIC(avg_o_totalprice, o_totalprice, prev_month_avg_price), columns={'month': month, 'year': year}) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) + JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate), type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) - AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 41ba7be61..740b284ba 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,11 +1,11 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) + JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) FILTER(condition=DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index d185fd519..c2708cd55 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) - FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) - PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) +ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) + FILTER(condition=DEFAULT_TO(count_s_suppkey, 0:numeric) > 0.5:numeric * total_suppliers, columns={'count_s_suppkey': count_s_suppkey, 'n_name': n_name, 'total_suppliers': total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 6c6ea66d7..9f0a06ac0 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,19 +1,17 @@ -ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) - FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) +ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', DEFAULT_TO(sum_n_ticker_type_trans_1, 0:numeric)), ('n_type_trans', DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric))], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) + FILTER(condition=n_ticker_type_trans / DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) < 0.2:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': sum_n_ticker_type_trans_1}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 0.8:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index a4d8aee87..15f3a6a3e 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last], limit=5:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) - LIMIT(limit=2:numeric, columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) +ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', INTEGER(p_retailprice)), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last], limit=5:numeric) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_retailprice': t0.p_retailprice, 'p_size': t0.p_size}) + LIMIT(limit=2:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}, orderings=[(INTEGER(p_retailprice)):asc_first]) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/simple_cross_5.txt b/tests/test_plan_refsols/simple_cross_5.txt index c3fd447c1..a0e6fdd19 100644 --- a/tests/test_plan_refsols/simple_cross_5.txt +++ b/tests/test_plan_refsols/simple_cross_5.txt @@ -4,8 +4,8 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ( AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_size': p_size}) - FILTER(condition=RANKING(args=[], partition=[anything_p_size], order=[(total_qty):desc_first], allow_ties=False) == 1:numeric, columns={'anything_p_size': anything_p_size, 'o_orderpriority': o_orderpriority, 'total_qty': total_qty}) - PROJECT(columns={'anything_p_size': anything_p_size, 'o_orderpriority': o_orderpriority, 'total_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + PROJECT(columns={'anything_p_size': anything_p_size, 'o_orderpriority': o_orderpriority, 'total_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + FILTER(condition=RANKING(args=[], partition=[anything_p_size], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'anything_p_size': anything_p_size, 'o_orderpriority': o_orderpriority, 'sum_l_quantity': sum_l_quantity}) AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'p_size': p_size}, aggregations={'anything_p_size': ANYTHING(p_size), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index f03df0318..8195645e6 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -2,8 +2,8 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_o JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(n_rows, 0:numeric)):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index f0f4f0fb3..e759b898f 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', quantity), ('n_shipments', n_rows)], orderings=[(quantity):desc_last, (s_name):asc_first], limit=3:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 'quantity': t1.quantity, 's_name': t0.s_name}) +ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('n_shipments', n_rows)], orderings=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (s_name):asc_first], limit=3:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 0bb678bcf..694b21b3a 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,10 +1,9 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': MONTH(ca_dt), 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 45394da84..9413f086a 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) - FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) - AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) +ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_3, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_expr_3, 0:numeric)), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) + FILTER(condition=DEFAULT_TO(sum_expr_3, 0:numeric) > 0:numeric, columns={'sum_expr_3': sum_expr_3, 'sum_n_rows': sum_n_rows, 'year': year}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 8b4fb4c9e..411981af0 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last], limit=10:numeric) - FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) - AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(ps_supplycost * ps_availqty)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(DEFAULT_TO(sum_expr_2, 0:numeric)):desc_last], limit=10:numeric) + FILTER(condition=DEFAULT_TO(sum_expr_2, 0:numeric) > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'ps_partkey': ps_partkey, 'sum_expr_2': sum_expr_2}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) + AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(ps_supplycost * ps_availqty)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index ff041d5aa..8acc50868 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) +ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=DEFAULT_TO(sum_l_quantity, 0:numeric) > 300:numeric, columns={'l_orderkey': l_orderkey, 'sum_l_quantity': sum_l_quantity}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 60e4e77d6..6d11fb64e 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,14 +1,13 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) - AGGREGATE(keys={'cntry_code': cntry_code}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'cntry_code': cntry_code}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'cntry_code': t0.cntry_code, 'n_rows': t1.n_rows}) - FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': cntry_code}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) - FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) - AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) - FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) + FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) + AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) + FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + FILTER(condition=ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql index cafd78875..6ac06680b 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql @@ -9,7 +9,7 @@ WITH _s0 AS ( GROUP BY DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)), customer_id -), _t2 AS ( +), _t1 AS ( SELECT SUM(_s0.sum_sale_price) AS sum_sum_sale_price, _s0.quarter, @@ -25,7 +25,7 @@ SELECT quarter, state AS customer_state, COALESCE(sum_sum_sale_price, 0) AS total_sales -FROM _t2 +FROM _t1 WHERE NOT sum_sum_sale_price IS NULL AND sum_sum_sale_price > 0 ORDER BY diff --git a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql index d9d271f6e..96ad10d92 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql @@ -25,7 +25,7 @@ WITH _s0 AS ( ) AS TEXT) || ' months' ), customer_id -), _t2 AS ( +), _t1 AS ( SELECT SUM(_s0.sum_sale_price) AS sum_sum_sale_price, _s0.quarter, @@ -41,7 +41,7 @@ SELECT quarter, state AS customer_state, COALESCE(sum_sum_sale_price, 0) AS total_sales -FROM _t2 +FROM _t1 WHERE NOT sum_sum_sale_price IS NULL AND sum_sum_sale_price > 0 ORDER BY diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 1fd157a20..07f334d4d 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,10 +1,10 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT ca_dt FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t6 AS ( +), _t5 AS ( SELECT co_id, co_name @@ -14,38 +14,38 @@ WITH _t3 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t5.ca_dt - FROM _t3 AS _t5 + _t4.ca_dt + FROM _t2 AS _t4 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_t5.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_t4.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t6 AS _t6 - ON _t6.co_id = devices.de_production_country_id + JOIN _t5 AS _t5 + ON _t5.co_id = devices.de_production_country_id GROUP BY - _t5.ca_dt + _t4.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t8.ca_dt - FROM _t3 AS _t8 + _t7.ca_dt + FROM _t2 AS _t7 JOIN main.incidents AS incidents - ON _t8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _t7.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t6 AS _t9 - ON _t9.co_id = devices.de_production_country_id + JOIN _t5 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _t8.ca_dt + _t7.ca_dt ) SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t2.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t2.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t2.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t2.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t3 AS _t3 +FROM _t2 AS _t2 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t3.ca_dt + ON _s7.ca_dt = _t2.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t3.ca_dt + ON _s15.ca_dt = _t2.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t2.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t2.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index b30eff167..221bed4db 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,10 +1,10 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT ca_dt FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t6 AS ( +), _t5 AS ( SELECT co_id, co_name @@ -14,38 +14,38 @@ WITH _t3 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t5.ca_dt - FROM _t3 AS _t5 + _t4.ca_dt + FROM _t2 AS _t4 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_t5.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_t4.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t6 AS _t6 - ON _t6.co_id = devices.de_production_country_id + JOIN _t5 AS _t5 + ON _t5.co_id = devices.de_production_country_id GROUP BY - _t5.ca_dt + _t4.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t8.ca_dt - FROM _t3 AS _t8 + _t7.ca_dt + FROM _t2 AS _t7 JOIN main.incidents AS incidents - ON _t8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _t7.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t6 AS _t9 - ON _t9.co_id = devices.de_production_country_id + JOIN _t5 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _t8.ca_dt + _t7.ca_dt ) SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t2.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t2.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t2.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t2.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t3 AS _t3 +FROM _t2 AS _t2 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t3.ca_dt + ON _s7.ca_dt = _t2.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t3.ca_dt + ON _s15.ca_dt = _t2.ca_dt GROUP BY - CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER) + CAST(STRFTIME('%Y', _t2.ca_dt) AS INTEGER), + CAST(STRFTIME('%m', _t2.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index 1f40c54cb..461f9fb2c 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -20,7 +20,7 @@ WITH _s2 AS ( ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) GROUP BY _s4.ca_dt -), _t2 AS ( +), _t1 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, @@ -57,7 +57,7 @@ SELECT ) AS pct_incident_change, COALESCE(sum_expr_3, 0) AS bought, COALESCE(sum_n_rows, 0) AS incidents -FROM _t2 +FROM _t1 WHERE NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ORDER BY diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index 53b91a35c..d0661f3f0 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -20,7 +20,7 @@ WITH _s2 AS ( ON _s4.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') GROUP BY _s4.ca_dt -), _t2 AS ( +), _t1 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, @@ -57,7 +57,7 @@ SELECT ) AS pct_incident_change, COALESCE(sum_expr_3, 0) AS bought, COALESCE(sum_n_rows, 0) AS incidents -FROM _t2 +FROM _t1 WHERE NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index 212223900..3f7009ea0 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -39,5 +39,5 @@ JOIN _s9 AS _s9 COALESCE(_s8.sum_metric, 0) * 0.0001 ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY - value DESC + COALESCE(_s9.sum_expr_2, 0) DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index 212223900..3f7009ea0 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -39,5 +39,5 @@ JOIN _s9 AS _s9 COALESCE(_s8.sum_metric, 0) * 0.0001 ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY - value DESC + COALESCE(_s9.sum_expr_2, 0) DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q18_ansi.sql b/tests/test_sql_refsols/tpch_q18_ansi.sql index 575feb447..aa9134752 100644 --- a/tests/test_sql_refsols/tpch_q18_ansi.sql +++ b/tests/test_sql_refsols/tpch_q18_ansi.sql @@ -1,4 +1,4 @@ -WITH _s3 AS ( +WITH _t1 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_s3.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _s3 AS _s3 - ON NOT _s3.sum_l_quantity IS NULL - AND _s3.l_orderkey = orders.o_orderkey - AND _s3.sum_l_quantity > 300 +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate diff --git a/tests/test_sql_refsols/tpch_q18_sqlite.sql b/tests/test_sql_refsols/tpch_q18_sqlite.sql index 575feb447..aa9134752 100644 --- a/tests/test_sql_refsols/tpch_q18_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q18_sqlite.sql @@ -1,4 +1,4 @@ -WITH _s3 AS ( +WITH _t1 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_s3.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _s3 AS _s3 - ON NOT _s3.sum_l_quantity IS NULL - AND _s3.l_orderkey = orders.o_orderkey - AND _s3.sum_l_quantity > 300 +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate From 60be207cff381583392419c6b435778f2f21b7c9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 17:10:25 -0400 Subject: [PATCH 060/143] [RUN CI] From ade8f35b514f526102a7f8a2ea1964d032be15f0 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 18:08:54 -0400 Subject: [PATCH 061/143] Adding more simplification patterns and tests --- pydough/conversion/relational_converter.py | 16 +- .../conversion/relational_simplification.py | 183 ++++++++++++++++-- pydough/relational/rel_util.py | 22 ++- tests/test_pipeline_defog_custom.py | 122 ++++++++++++ .../agg_simplification_1.txt | 2 +- tests/test_plan_refsols/aggregate_anti.txt | 2 +- tests/test_plan_refsols/anti_aggregate.txt | 2 +- .../anti_aggregate_alternate.txt | 2 +- tests/test_plan_refsols/correl_7.txt | 2 +- tests/test_plan_refsols/simplification_2.txt | 2 + tests/test_plan_refsols/simplification_3.txt | 3 + .../agg_simplification_1_ansi.sql | 2 +- .../agg_simplification_1_sqlite.sql | 2 +- .../simplification_2_ansi.sql | 41 ++++ .../simplification_2_sqlite.sql | 41 ++++ .../simplification_3_ansi.sql | 12 ++ .../simplification_3_sqlite.sql | 12 ++ 17 files changed, 438 insertions(+), 30 deletions(-) create mode 100644 tests/test_plan_refsols/simplification_2.txt create mode 100644 tests/test_plan_refsols/simplification_3.txt create mode 100644 tests/test_sql_refsols/simplification_2_ansi.sql create mode 100644 tests/test_sql_refsols/simplification_2_sqlite.sql create mode 100644 tests/test_sql_refsols/simplification_3_ansi.sql create mode 100644 tests/test_sql_refsols/simplification_3_sqlite.sql diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 71aaeab4e..0caf6c7e0 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1448,22 +1448,24 @@ def optimize_relational_tree( # possible. root = bubble_column_names(root) - # Step 8: run projection pullup followed by column pruning 2x. + # Step 8: the following pipeline twice: + # A: projection pullup + # B: simplification + # C: filter pushdown + # D: column pruning for _ in range(2): root = confirm_root(pullup_projections(root)) simplify_expressions(root) + root._input = push_filters(root.input, set()) root = ColumnPruner().prune_unused_columns(root) - # Step 9: re-run filter pushdown - root._input = push_filters(root.input, set()) - - # Step 10: re-run projection merging, without pushing into joins. + # Step 9: re-run projection merging, without pushing into joins. root = confirm_root(merge_projects(root, push_into_joins=False)) - # Step 11: re-run column bubbling + # Step 10: re-run column bubbling root = bubble_column_names(root) - # Step 12: re-run column pruning. + # Step 11: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) return root diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 70e8ef858..6d43d4e0e 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -153,6 +153,12 @@ def simplify_function_call( ): output_predicates.add(LogicalPredicate.NOT_NULL) case pydop.DEFAULT_TO: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is None + ): + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] if LogicalPredicate.NOT_NULL in arg_predicates[0]: output_expr = expr.inputs[0] output_predicates = arg_predicates[0] @@ -171,22 +177,140 @@ def simplify_function_call( output_predicates = arg_predicates[0] else: output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - case ( - pydop.LENGTH - | pydop.BAN - | pydop.BOR - | pydop.BXR - | pydop.STARTSWITH - | pydop.ENDSWITH - | pydop.CONTAINS - | pydop.LIKE - | pydop.SQRT - | pydop.MONOTONIC - ): + case pydop.LENGTH: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + str_len: int = len(expr.inputs[0].value) + output_expr = LiteralExpression(str_len, expr.data_type) + if str_len > 0: + output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.LOWER: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + output_expr = LiteralExpression( + expr.inputs[0].value.lower(), expr.data_type + ) + case pydop.UPPER: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + output_expr = LiteralExpression( + expr.inputs[0].value.upper(), expr.data_type + ) + case pydop.STARTSWITH: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[0].value.startswith(expr.inputs[1].value), + expr.data_type, + ) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.ENDSWITH: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[0].value.endswith(expr.inputs[1].value), expr.data_type + ) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.CONTAINS: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[1].value in expr.inputs[0].value, expr.data_type + ) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.SQRT: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, (int, float)) + and expr.inputs[0].value >= 0 + ): + sqrt_value: float = expr.inputs[0].value ** 0.5 + output_expr = LiteralExpression(sqrt_value, expr.data_type) + if sqrt_value > 0: + output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.MONOTONIC: + v0: int | float | None = None + v1: int | float | None = None + v2: int | float | None = None + monotonic_result: bool + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, (int, float) + ): + v0 = expr.inputs[0].value + if isinstance(expr.inputs[1], LiteralExpression) and isinstance( + expr.inputs[1].value, (int, float) + ): + v1 = expr.inputs[1].value + if isinstance(expr.inputs[2], LiteralExpression) and isinstance( + expr.inputs[2].value, (int, float) + ): + v2 = expr.inputs[2].value + if v0 is not None and v1 is not None and v2 is not None: + monotonic_result = (v0 <= v1) and (v1 <= v2) + output_expr = LiteralExpression(monotonic_result, expr.data_type) + if monotonic_result: + output_predicates.add(LogicalPredicate.POSITIVE) + elif v0 is not None and v1 is not None: + if v0 <= v1: + output_expr = CallExpression( + pydop.LEQ, expr.data_type, expr.inputs[1:] + ) + else: + output_expr = LiteralExpression(False, expr.data_type) + elif v1 is not None and v2 is not None: + if v1 <= v2: + output_expr = CallExpression( + pydop.LEQ, expr.data_type, expr.inputs[:2] + ) + else: + output_expr = LiteralExpression(False, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.BXR | pydop.LIKE: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.BAN: + if any( + isinstance(arg, LiteralExpression) and arg.value in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(False, expr.data_type) + if all( + isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.BOR: + if any( + isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(True, expr.data_type) + if all( + isinstance(arg, LiteralExpression) and arg.value in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(False, expr.data_type) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: - match (expr.op, expr.inputs[1]): - case (pydop.GRT, LiteralExpression()) if ( + match (expr.inputs[0], expr.op, expr.inputs[1]): + case (_, pydop.GRT, LiteralExpression()) if ( expr.inputs[1].value == 0 and LogicalPredicate.POSITIVE in arg_predicates[0] ): @@ -194,7 +318,7 @@ def simplify_function_call( output_predicates.add(LogicalPredicate.NOT_NULL) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) output_predicates.add(LogicalPredicate.POSITIVE) - case (pydop.GEQ, LiteralExpression()) if ( + case (_, pydop.GEQ, LiteralExpression()) if ( expr.inputs[1].value == 0 and LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] ): @@ -202,6 +326,35 @@ def simplify_function_call( output_predicates.add(LogicalPredicate.NOT_NULL) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) output_predicates.add(LogicalPredicate.POSITIVE) + case (LiteralExpression(), _, LiteralExpression()): + match ( + expr.inputs[0].value, + expr.inputs[1].value, + expr.op, + ): + case (None, _, _) | (_, None, _): + output_expr = LiteralExpression(None, expr.data_type) + case (x, y, pydop.EQU): + output_expr = LiteralExpression(x == y, expr.data_type) + case (x, y, pydop.NEQ): + output_expr = LiteralExpression(x != y, expr.data_type) + case (x, y, pydop.LET) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x < y, expr.data_type) # type: ignore + case (x, y, pydop.LEQ) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x <= y, expr.data_type) # type: ignore + case (x, y, pydop.GRT) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x > y, expr.data_type) # type: ignore + case (x, y, pydop.GEQ) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x >= y, expr.data_type) # type: ignore + case _: pass output_predicates.add(LogicalPredicate.NOT_NEGATIVE) diff --git a/pydough/relational/rel_util.py b/pydough/relational/rel_util.py index 91e5919f9..12255731c 100644 --- a/pydough/relational/rel_util.py +++ b/pydough/relational/rel_util.py @@ -54,6 +54,15 @@ pydop.STARTSWITH, pydop.ENDSWITH, pydop.CONTAINS, + pydop.REPLACE, + pydop.FIND, + pydop.GETPART, + pydop.LPAD, + pydop.RPAD, + pydop.STRCOUNT, + pydop.INTEGER, + pydop.FLOAT, + pydop.STRING, pydop.LIKE, pydop.LOWER, pydop.UPPER, @@ -72,10 +81,21 @@ pydop.SUB, pydop.MUL, pydop.DIV, + pydop.ABS, + pydop.FLOOR, + pydop.LARGEST, + pydop.SMALLEST, + pydop.CEIL, + pydop.MONOTONIC, + pydop.POW, + pydop.POWER, + pydop.SQRT, + pydop.ROUND, + pydop.SLICE, } """ A set of operators with the property that the output is null if any of the -inputs are null. +column inputs are null. """ diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index 9f0a587c0..58265f72b 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -1671,6 +1671,128 @@ def get_day_of_week( ), id="simplification_1", ), + pytest.param( + PyDoughPandasTest( + "result = Broker.CALCULATE(" + " s00 = DEFAULT_TO(None, 0) == 0," # -> True + " s01 = DEFAULT_TO(None, 0) != 0," # -> False + " s02 = DEFAULT_TO(None, 0) >= 0," # -> True + " s03 = DEFAULT_TO(None, 0) > 0," # -> False + " s04 = DEFAULT_TO(None, 0) <= 0," # -> True + " s05 = DEFAULT_TO(None, 0) < 0," # -> False + " s06 = DEFAULT_TO(None, 0) == None," # -> None + " s07 = DEFAULT_TO(None, 0) != None," # -> None + " s08 = DEFAULT_TO(None, 0) >= None," # -> None + " s09 = DEFAULT_TO(None, 0) > None," # -> None + " s10 = DEFAULT_TO(None, 0) <= None," # -> None + " s11 = DEFAULT_TO(None, 0) < None," # -> None + " s12 = DEFAULT_TO(None, 'ab') == 'cd'," # -> False + " s13 = DEFAULT_TO(None, 'ab') != 'cd'," # -> True + " s14 = DEFAULT_TO(None, 'ab') >= 'cd'," # -> False + " s15 = DEFAULT_TO(None, 'ab') > 'cd'," # -> False + " s16 = DEFAULT_TO(None, 'ab') <= 'cd'," # -> True + " s17 = DEFAULT_TO(None, 'ab') < 'cd'," # -> True + " s18 = True | (COUNT(customers) > 10)," # -> True + " s19 = False & (COUNT(customers) > 10)," # -> False + " s20 = False | (LENGTH('foo') > 0)," # -> True + " s21 = False | (LENGTH('foo') < 0)," # -> False + " s22 = True & (LENGTH('foo') > 0)," # -> True + " s23 = True & (LENGTH('foo') < 0)," # -> False + " s24 = STARTSWITH('a', 'abc')," # -> False + " s25 = STARTSWITH('abc', 'a')," # -> True + " s26 = ENDSWITH('abc', 'c')," # -> True + " s27 = ENDSWITH('abc', 'ab')," # -> False + " s28 = CONTAINS('abc', 'b')," # -> True + " s29 = CONTAINS('abc', 'B')," # -> False + " s30 = LENGTH('alphabet')," # -> 8 + " s31 = LOWER('AlPhAbEt')," # -> 'alphabet' + " s32 = UPPER('sOuP')," # -> 'SOUP' + " s33 = True == True," # -> True + " s34 = True != True," # -> False + " s35 = True == False," # -> False + " s36 = True != False," # -> True + " s37 = SQRT(9)," # -> 3.0 + ")", + "Broker", + lambda: pd.DataFrame( + { + "s00": [1], + "s01": [0], + "s02": [1], + "s03": [0], + "s04": [1], + "s05": [0], + "s06": [None], + "s07": [None], + "s08": [None], + "s09": [None], + "s10": [None], + "s11": [None], + "s12": [0], + "s13": [1], + "s14": [0], + "s15": [0], + "s16": [1], + "s17": [1], + "s18": [1], + "s19": [0], + "s20": [1], + "s21": [0], + "s22": [1], + "s23": [0], + "s24": [0], + "s25": [1], + "s26": [1], + "s27": [0], + "s28": [1], + "s29": [0], + "s30": [8], + "s31": ["alphabet"], + "s32": ["SOUP"], + "s33": [1], + "s34": [0], + "s35": [0], + "s36": [1], + "s37": [3.0], + } + ), + "simplification_2", + ), + id="simplification_2", + ), + pytest.param( + PyDoughPandasTest( + "result = Broker.CALCULATE(" + " s00 = MONOTONIC(1, 2, 3)," # -> True + " s01 = MONOTONIC(1, 1, 1)," # -> True + " s02 = MONOTONIC(1, 0, 3)," # -> False + " s03 = MONOTONIC(1, 4, 3)," # -> False + " s04 = MONOTONIC(1, 2, 1)," # -> False + " s05 = MONOTONIC(1, 0, 1)," # -> False + " s06 = MONOTONIC(1, LENGTH('foo'), COUNT(customers))," # -> 3 <= COUNT(customers) + " s07 = MONOTONIC(10, LENGTH('foo'), COUNT(customers))," # False + " s08 = MONOTONIC(COUNT(customers), LENGTH('foobar'), 9)," # -> COUNT(customers) <= 6 + " s09 = MONOTONIC(COUNT(customers), LENGTH('foobar'), 5)," # -> False + ")", + "Broker", + lambda: pd.DataFrame( + { + "s00": [1], + "s01": [1], + "s02": [0], + "s03": [0], + "s04": [0], + "s05": [0], + "s06": [1], + "s07": [0], + "s08": [0], + "s09": [0], + } + ), + "simplification_3", + ), + id="simplification_3", + ), ], ) def defog_custom_pipeline_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt index 03716b635..e067b9553 100644 --- a/tests/test_plan_refsols/agg_simplification_1.txt +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', count_one * 2:numeric), ('su3', count_one * -1:numeric), ('su4', count_one * -3:numeric), ('su5', 0:numeric), ('su6', count_one * 0.5:numeric), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) +ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', count_one * 2:numeric), ('su3', count_one * -1:numeric), ('su4', count_one * -3:numeric), ('su5', 0:numeric), ('su6', count_one * 0.5:numeric), ('su7', 0:numeric), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) AGGREGATE(keys={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}, aggregations={'agg_63': QUANTILE(LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string)), 0.8:numeric), 'count_one': COUNT()}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index e1482682d..02458ffdc 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) +ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index e1482682d..02458ffdc 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) +ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index af1852c80..598407ce0 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('sum_price_of_10parts', None:unknown)], orderings=[]) +ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', 0:numeric), ('sum_price_of_10parts', None:unknown)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_7.txt b/tests/test_plan_refsols/correl_7.txt index 6ba1a011d..a9c40b49d 100644 --- a/tests/test_plan_refsols/correl_7.txt +++ b/tests/test_plan_refsols/correl_7.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', r_name), ('n_prefix_nations', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) +ROOT(columns=[('name', r_name), ('n_prefix_nations', 0:numeric)], orderings=[]) JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/simplification_2.txt b/tests/test_plan_refsols/simplification_2.txt new file mode 100644 index 000000000..4e9433c17 --- /dev/null +++ b/tests/test_plan_refsols/simplification_2.txt @@ -0,0 +1,2 @@ +ROOT(columns=[('s00', True:bool), ('s01', False:bool), ('s02', True:bool), ('s03', False:bool), ('s04', True:bool), ('s05', False:bool), ('s06', None:bool), ('s07', None:bool), ('s08', None:bool), ('s09', None:bool), ('s10', None:bool), ('s11', None:bool), ('s12', False:bool), ('s13', True:bool), ('s14', False:bool), ('s15', False:bool), ('s16', True:bool), ('s17', True:bool), ('s18', True:bool), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', False:bool), ('s25', True:bool), ('s26', True:bool), ('s27', False:bool), ('s28', True:bool), ('s29', False:bool), ('s30', 8:numeric), ('s31', 'alphabet':string), ('s32', 'SOUP':string), ('s33', True:bool), ('s34', False:bool), ('s35', False:bool), ('s36', True:bool), ('s37', 3.0:numeric)], orderings=[]) + EMPTYSINGLETON() diff --git a/tests/test_plan_refsols/simplification_3.txt b/tests/test_plan_refsols/simplification_3.txt new file mode 100644 index 000000000..8078734b7 --- /dev/null +++ b/tests/test_plan_refsols/simplification_3.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('s00', True:bool), ('s01', True:bool), ('s02', False:bool), ('s03', False:bool), ('s04', False:bool), ('s05', False:bool), ('s06', 3:numeric <= n_rows), ('s07', False:bool), ('s08', n_rows <= 6:numeric), ('s09', False:bool)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.sbCustomer, columns={}) diff --git a/tests/test_sql_refsols/agg_simplification_1_ansi.sql b/tests/test_sql_refsols/agg_simplification_1_ansi.sql index 1807f6d2e..49b80edfc 100644 --- a/tests/test_sql_refsols/agg_simplification_1_ansi.sql +++ b/tests/test_sql_refsols/agg_simplification_1_ansi.sql @@ -6,7 +6,7 @@ SELECT COUNT(*) * -3 AS su4, 0 AS su5, COUNT(*) * 0.5 AS su6, - COALESCE(NULL, 0) AS su7, + 0 AS su7, COALESCE( LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), 0 diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql index 515f11664..4ebaf317c 100644 --- a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -159,7 +159,7 @@ SELECT COUNT(*) * -3 AS su4, 0 AS su5, COUNT(*) * 0.5 AS su6, - COALESCE(NULL, 0) AS su7, + 0 AS su7, COALESCE( LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), 0 diff --git a/tests/test_sql_refsols/simplification_2_ansi.sql b/tests/test_sql_refsols/simplification_2_ansi.sql new file mode 100644 index 000000000..237dabec3 --- /dev/null +++ b/tests/test_sql_refsols/simplification_2_ansi.sql @@ -0,0 +1,41 @@ +SELECT + TRUE AS s00, + FALSE AS s01, + TRUE AS s02, + FALSE AS s03, + TRUE AS s04, + FALSE AS s05, + NULL AS s06, + NULL AS s07, + NULL AS s08, + NULL AS s09, + NULL AS s10, + NULL AS s11, + FALSE AS s12, + TRUE AS s13, + FALSE AS s14, + FALSE AS s15, + TRUE AS s16, + TRUE AS s17, + TRUE AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + FALSE AS s24, + TRUE AS s25, + TRUE AS s26, + FALSE AS s27, + TRUE AS s28, + FALSE AS s29, + 8 AS s30, + 'alphabet' AS s31, + 'SOUP' AS s32, + TRUE AS s33, + FALSE AS s34, + FALSE AS s35, + TRUE AS s36, + 3.0 AS s37 +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/simplification_2_sqlite.sql b/tests/test_sql_refsols/simplification_2_sqlite.sql new file mode 100644 index 000000000..b5d5d71f5 --- /dev/null +++ b/tests/test_sql_refsols/simplification_2_sqlite.sql @@ -0,0 +1,41 @@ +SELECT + TRUE AS s00, + FALSE AS s01, + TRUE AS s02, + FALSE AS s03, + TRUE AS s04, + FALSE AS s05, + NULL AS s06, + NULL AS s07, + NULL AS s08, + NULL AS s09, + NULL AS s10, + NULL AS s11, + FALSE AS s12, + TRUE AS s13, + FALSE AS s14, + FALSE AS s15, + TRUE AS s16, + TRUE AS s17, + TRUE AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + FALSE AS s24, + TRUE AS s25, + TRUE AS s26, + FALSE AS s27, + TRUE AS s28, + FALSE AS s29, + 8 AS s30, + 'alphabet' AS s31, + 'SOUP' AS s32, + TRUE AS s33, + FALSE AS s34, + FALSE AS s35, + TRUE AS s36, + 3.0 AS s37 +FROM (VALUES + (NULL)) AS _q_0 diff --git a/tests/test_sql_refsols/simplification_3_ansi.sql b/tests/test_sql_refsols/simplification_3_ansi.sql new file mode 100644 index 000000000..3b49cd41b --- /dev/null +++ b/tests/test_sql_refsols/simplification_3_ansi.sql @@ -0,0 +1,12 @@ +SELECT + TRUE AS s00, + TRUE AS s01, + FALSE AS s02, + FALSE AS s03, + FALSE AS s04, + FALSE AS s05, + COUNT(*) >= 3 AS s06, + FALSE AS s07, + COUNT(*) <= 6 AS s08, + FALSE AS s09 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_3_sqlite.sql b/tests/test_sql_refsols/simplification_3_sqlite.sql new file mode 100644 index 000000000..3b49cd41b --- /dev/null +++ b/tests/test_sql_refsols/simplification_3_sqlite.sql @@ -0,0 +1,12 @@ +SELECT + TRUE AS s00, + TRUE AS s01, + FALSE AS s02, + FALSE AS s03, + FALSE AS s04, + FALSE AS s05, + COUNT(*) >= 3 AS s06, + FALSE AS s07, + COUNT(*) <= 6 AS s08, + FALSE AS s09 +FROM main.sbcustomer From 3d9167e76a0f74a51c01b75dc834ef2bd0fa9f74 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 18:09:20 -0400 Subject: [PATCH 062/143] [RUN CI] From 3d17cad4793c21abb7c2cb6724c0632b4b56f1be Mon Sep 17 00:00:00 2001 From: knassre-bodo <105652923+knassre-bodo@users.noreply.github.com> Date: Tue, 22 Jul 2025 01:11:14 -0400 Subject: [PATCH 063/143] Update pydough/conversion/projection_pullup.py Co-authored-by: Hadia Ahmed --- pydough/conversion/projection_pullup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index a2c891a1b..55931e4d3 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -181,7 +181,7 @@ def pull_project_helper( ) # Iterate through the columns of the project to see which ones can be - # pulled up into the parent, dding them to a substitutions mapping that + # pulled up into the parent, adding them to a substitutions mapping that # will be used to apply the transformations. substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): From df2e4018553eb141f9c00b37aa6dd5bdab687e41 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 22 Jul 2025 01:30:49 -0400 Subject: [PATCH 064/143] Final revisions/documentation [RUN CI] --- pydough/conversion/merge_projects.py | 22 +++++++------- pydough/conversion/projection_pullup.py | 19 +++++++----- pydough/conversion/relational_converter.py | 34 +++++++++++++++------- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index 8c400cb86..a310aefef 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -197,18 +197,17 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: if isinstance(node, RelationalRoot): # Replace all column references in the root's columns with # the expressions from the child projection. - for idx, (name, expr) in enumerate(node.ordered_columns): - new_expr = transpose_expression(expr, child_project.columns) - node.columns[name] = new_expr - node.ordered_columns[idx] = (name, new_expr) + node._ordered_columns = [ + (name, transpose_expression(expr, node.input.columns)) + for name, expr in node.ordered_columns + ] + node._columns = dict(node.ordered_columns) # Do the same with the sort expressions. for idx, sort_info in enumerate(node.orderings): new_expr = transpose_expression( sort_info.expr, child_project.columns ) - node.orderings[idx] = ExpressionSortInfo( - new_expr, sort_info.ascending, sort_info.nulls_first - ) + node.orderings[idx].expr = new_expr # Delete the child projection from the tree, replacing it # with its input. node._input = child_project.input @@ -262,10 +261,11 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: # If the orderings are the same, pull in the limit into the root. # Replace all column references in the root's columns with # the expressions from the child projection. - for idx, (name, expr) in enumerate(node.ordered_columns): - new_expr = transpose_expression(expr, node.input.columns) - node.columns[name] = new_expr - node.ordered_columns[idx] = (name, new_expr) + node._ordered_columns = [ + (name, transpose_expression(expr, node.input.columns)) + for name, expr in node.ordered_columns + ] + node._columns = dict(node.ordered_columns) node._orderings = new_orderings node._limit = node.input.limit # Delete the child projection from the tree, replacing it diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 55931e4d3..0e4065fd1 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -64,7 +64,8 @@ def widen_columns( } # Pull all the columns from each input to the node into the node's output - # columns if they are not already in the node's output columns. + # columns if they are not already in the node's output columns. Make sure + # not to include no-op mappings. for input_idx in range(len(node.inputs)): input_alias: str | None = node.default_input_aliases[input_idx] input_node: RelationalNode = node.inputs[input_idx] @@ -87,12 +88,13 @@ def widen_columns( new_ref: ColumnReference = ColumnReference(new_name, expr.data_type) node.columns[new_name] = ref_expr existing_vals[expr] = ref_expr - substitutions[ref_expr] = new_ref - else: + if ref_expr != new_ref: + substitutions[ref_expr] = new_ref + elif ref_expr != existing_vals[expr]: substitutions[ref_expr] = existing_vals[expr] - # Return the substitution mapping, without any no-op substitutions - return {k: v for k, v in substitutions.items() if k != v} + # Return the substitution mapping + return substitutions def pull_non_columns(node: Join | Filter | Limit) -> RelationalNode: @@ -181,7 +183,7 @@ def pull_project_helper( ) # Iterate through the columns of the project to see which ones can be - # pulled up into the parent, adding them to a substitutions mapping that + # pulled up into the parent, dding them to a substitutions mapping that # will be used to apply the transformations. substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): @@ -205,7 +207,8 @@ def pull_project_into_join(node: Join, input_index: int) -> None: Args: `node`: The Join node to pull the Project columns into. `input_index`: The index of the input to the Join node that should have - its columns pulled up, if it is a project node. + its columns pulled up, if it is a project node. This is assumed to be + either 0 (for the LHS) or 1 (for the RHS). """ # Skip if the input at the specified input is not a Project node. @@ -480,7 +483,7 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: possible. This transformation is done in-place. Args: - `node`: The Filter node to pull the Project columns into. + `node`: The Aggregate node to pull the Project columns into. """ if not isinstance(node.input, Project): return node diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index f66debc38..93303b7d8 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1417,7 +1417,7 @@ def optimize_relational_tree( Returns: The optimized relational root. """ - # Step 1: push filters down as far as possible + # Step 1: push filters down as far as possible. root._input = push_filters(root.input, set()) # Step 2: merge adjacent projections, unless it would result in excessive @@ -1432,10 +1432,14 @@ def optimize_relational_tree( # operating on already unique data. root = remove_redundant_aggs(root) - # Step 5: re-run projection merging. + # Step 5: re-run projection merging since the removal of redundant + # aggregations may have created redundant projections that can be deleted. root = confirm_root(merge_projects(root)) - # Step 6: re-run column pruning. + # Step 6: re-run column pruning after the various steps, which may have + # rendered more columns unused. This is done befre the next step to remove + # as many column names as possible so the column bubbling step can try to + # use nicer names without worrying about collisions. root = ColumnPruner().prune_unused_columns(root) # Step 7: bubble up names from the leaf nodes to further encourage simpler @@ -1443,21 +1447,31 @@ def optimize_relational_tree( # possible. root = bubble_column_names(root) - # Step 8: run projection pullup followed by column pruning 2x. + # Step 8: the following pipeline twice: + # A: projection pullup + # B: filter pushdown + # C: column pruning + # This is done because pullup will create more opportunities for filter + # pushdown, and the two together will create more opportunities for + # column pruning, the latter of which will unlock more opportunities for + # pullup and pushdown and so on. for _ in range(2): root = confirm_root(pullup_projections(root)) + root._input = push_filters(root.input, set()) root = ColumnPruner().prune_unused_columns(root) - # Step 9: re-run filter pushdown - root._input = push_filters(root.input, set()) - - # Step 10: re-run projection merging, without pushing into joins. + # Step 9: re-run projection merging, without pushing into joins. This + # will allow some redundant projections created by pullup to be removed + # entirely. root = confirm_root(merge_projects(root, push_into_joins=False)) - # Step 11: re-run column bubbling + # Step 10: re-run column bubbling to further simplify the final names of + # columns in the output now that more columns have been pruned, and delete + # any new duplicate columns that were created during the pullup step. root = bubble_column_names(root) - # Step 12: re-run column pruning. + # Step 11: re-run column pruning one last time to remove any columns that + # are no longer used after the final round of transformations. root = ColumnPruner().prune_unused_columns(root) return root From ce7e035a3f1a48045b40cb20567fbce37b288057 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 23 Jul 2025 11:06:16 -0400 Subject: [PATCH 065/143] Completed refactor of how simplification predicates work to use a PredicateSet object --- .../conversion/relational_simplification.py | 455 ++++++++++++------ tests/test_plan_refsols/simplification_1.txt | 2 +- .../simplification_1_ansi.sql | 2 +- .../simplification_1_sqlite.sql | 2 +- 4 files changed, 323 insertions(+), 138 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 6d43d4e0e..341ff9a0d 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -5,7 +5,7 @@ __all__ = ["simplify_expressions"] -from enum import Enum +from dataclasses import dataclass import pydough.pydough_operators as pydop from pydough.relational import ( @@ -31,15 +31,81 @@ ) -class LogicalPredicate(Enum): +@dataclass +class PredicateSet: """ - Enum representing logical predicates that can be inferred about relational - expressions. + A set of logical predicates that can be inferred about relational + expressions and used to simplify other expressions. """ - NOT_NULL = "NOT_NULL" - NOT_NEGATIVE = "NOT_NEGATIVE" - POSITIVE = "POSITIVE" + not_null: bool = False + """ + Whether the expression is guaranteed to not be null. + """ + + not_negative: bool = False + """ + Whether the expression is guaranteed to not be negative. + """ + + positive: bool = False + """ + Whether the expression is guaranteed to be positive. + """ + + def __or__(self, other: "PredicateSet") -> "PredicateSet": + """ + Combines two predicate sets using a logical OR operation. + """ + return PredicateSet( + not_null=self.not_null or other.not_null, + not_negative=self.not_negative or other.not_negative, + positive=self.positive or other.positive, + ) + + def __and__(self, other: "PredicateSet") -> "PredicateSet": + """ + Combines two predicate sets using a logical AND operation. + """ + return PredicateSet( + not_null=self.not_null and other.not_null, + not_negative=self.not_negative and other.not_negative, + positive=self.positive and other.positive, + ) + + def __sub__(self, other: "PredicateSet") -> "PredicateSet": + """ + Subtracts one predicate set from another. + """ + return PredicateSet( + not_null=self.not_null and not other.not_null, + not_negative=self.not_negative and not other.not_negative, + positive=self.positive and not other.positive, + ) + + @staticmethod + def union(predicates: list["PredicateSet"]) -> "PredicateSet": + """ + Computes the union of a list of predicate sets. + """ + result: PredicateSet = PredicateSet() + for pred in predicates[1:]: + result = result | pred + return result + + @staticmethod + def intersect(predicates: list["PredicateSet"]) -> "PredicateSet": + """ + Computes the intersection of a list of predicate sets. + """ + result: PredicateSet = PredicateSet() + if len(predicates) == 0: + return result + else: + result |= predicates[0] + for pred in predicates[1:]: + result = result & pred + return result NULL_PROPAGATING_OPS: set[pydop.PyDoughOperator] = { @@ -93,34 +159,50 @@ class LogicalPredicate(Enum): def simplify_function_call( expr: CallExpression, - arg_predicates: list[set[LogicalPredicate]], + arg_predicates: list[PredicateSet], no_group_aggregate: bool, -) -> tuple[RelationalExpression, set[LogicalPredicate]]: +) -> tuple[RelationalExpression, PredicateSet]: """ TODO """ output_expr: RelationalExpression = expr - output_predicates: set[LogicalPredicate] = set() + output_predicates: PredicateSet = PredicateSet() + union_set: PredicateSet = PredicateSet.union(arg_predicates) + intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) + + # If the call has null propagating rules, all of hte arguments are non-null, + # the output is guaranteed to be non-null. if expr.op in NULL_PROPAGATING_OPS: - if all(LogicalPredicate.NOT_NULL in preds for preds in arg_predicates): - output_predicates.add(LogicalPredicate.NOT_NULL) + if intersect_set.not_null: + output_predicates.not_null = True + match expr.op: case pydop.COUNT | pydop.NDISTINCT: - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be non-null + # and non-negative. + output_predicates.not_null = True + output_predicates.not_negative = True + + # The output if COUNT(*) is positive if unless doing a no-groupby + # aggregation. Same goes for calling COUNT or NDISTINCT ona non-null + # column. if not no_group_aggregate: - if ( - len(expr.inputs) == 0 - or LogicalPredicate.NOT_NULL in arg_predicates[0] - ): - output_predicates.add(LogicalPredicate.POSITIVE) + if len(expr.inputs) == 0 or arg_predicates[0].not_null: + output_predicates.positive = True + + # COUNT(x) where x is non-null can be rewritten as COUNT(*), which + # has the same positive rule as before. elif ( expr.op == pydop.COUNT and len(expr.inputs) == 1 - and LogicalPredicate.NOT_NULL in arg_predicates[0] + and arg_predicates[0].not_null ): - output_predicates.add(LogicalPredicate.POSITIVE) + if not no_group_aggregate: + output_predicates.positive = True output_expr = CallExpression(pydop.COUNT, expr.data_type, []) + + # All of these operators are non-null aor non-negative if their first + # argument is. case ( pydop.SUM | pydop.AVG @@ -130,53 +212,79 @@ def simplify_function_call( | pydop.MEDIAN | pydop.QUANTILE ): - for predicate in [ - LogicalPredicate.NOT_NEGATIVE, - LogicalPredicate.POSITIVE, - ]: - if predicate in arg_predicates[0]: - output_predicates.add(predicate) + output_predicates |= arg_predicates[0] & PredicateSet( + not_null=True, not_negative=True + ) + + # The result of addition is non-negative or positive if all the + # operands are. It is also positive if all the operands are non-negative + # and at least one of them is positive. + case pydop.ADD: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True + ) + if intersect_set.not_negative and union_set.positive: + output_predicates.positive = True + + # The result of multiplication is non-negative or positive if all the + # operands are. + case pydop.MUL: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True + ) + + # The result of division is non-negative or positive if all the + # operands are, and is also non-null if both operands are non-null and + # the second operand is positive. + case pydop.DIV: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True + ) if ( - LogicalPredicate.NOT_NULL in arg_predicates[0] - and not no_group_aggregate + arg_predicates[0].not_null + and arg_predicates[1].not_null + and arg_predicates[1].positive ): - output_predicates.add(LogicalPredicate.NOT_NULL) - case pydop.ADD | pydop.MUL | pydop.DIV: - for predicate in [LogicalPredicate.NOT_NEGATIVE, LogicalPredicate.POSITIVE]: - if all(predicate in preds for preds in arg_predicates): - output_predicates.add(predicate) - if expr.op == pydop.DIV: - if ( - LogicalPredicate.NOT_NULL in arg_predicates[0] - and LogicalPredicate.NOT_NULL in arg_predicates[1] - and LogicalPredicate.POSITIVE in arg_predicates[1] - ): - output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.not_null = True + case pydop.DEFAULT_TO: + # DEFAULT_TO(None, x) -> x if ( isinstance(expr.inputs[0], LiteralExpression) and expr.inputs[0].value is None ): - output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] - if LogicalPredicate.NOT_NULL in arg_predicates[0]: + if len(expr.inputs) == 2: + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] + else: + output_expr = CallExpression( + pydop.DEFAULT_TO, expr.data_type, expr.inputs[1:] + ) + output_predicates |= PredicateSet.intersect(arg_predicates[1:]) + + # DEFAULT_TO(x, y) -> x if x is non-null. + elif arg_predicates[0].not_null: output_expr = expr.inputs[0] - output_predicates = arg_predicates[0] + output_predicates |= arg_predicates[0] + + # Otherwise, it is non-null if any of the arguments are non-null, + # and gains any predicates that all the arguments have in common. else: - if any(LogicalPredicate.NOT_NULL in preds for preds in arg_predicates): - output_predicates.add(LogicalPredicate.NOT_NULL) - for pred in arg_predicates[0]: - if all(pred in preds for preds in arg_predicates): - output_predicates.add(pred) + if union_set.not_null: + output_predicates.not_null = True + output_predicates |= intersect_set + + # ABS(x) -> x if x is positive or non-negative. At hte very least, we + # know it is always non-negative. case pydop.ABS: - if ( - LogicalPredicate.POSITIVE in arg_predicates[0] - or LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] - ): + if arg_predicates[0].not_negative or arg_predicates[0].positive: output_expr = expr.inputs[0] - output_predicates = arg_predicates[0] + output_predicates |= arg_predicates[0] else: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # LENGTH(x) can be constant folded if x is a string literal. Otherwise, + # we know it is non-negative. case pydop.LENGTH: if isinstance(expr.inputs[0], LiteralExpression) and isinstance( expr.inputs[0].value, str @@ -184,8 +292,12 @@ def simplify_function_call( str_len: int = len(expr.inputs[0].value) output_expr = LiteralExpression(str_len, expr.data_type) if str_len > 0: - output_predicates.add(LogicalPredicate.POSITIVE) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive = True + output_predicates.not_negative = True + + # LOWER, UPPER, STARTSWITH, ENDSWITH, and CONTAINS can be constant + # folded if the inputs are string literals. The boolean-returning + # operators are always non-negative. case pydop.LOWER: if isinstance(expr.inputs[0], LiteralExpression) and isinstance( expr.inputs[0].value, str @@ -211,7 +323,10 @@ def simplify_function_call( expr.inputs[0].value.startswith(expr.inputs[1].value), expr.data_type, ) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive |= expr.inputs[0].value.startswith( + expr.inputs[1].value + ) + output_predicates.not_negative = True case pydop.ENDSWITH: if ( isinstance(expr.inputs[0], LiteralExpression) @@ -222,7 +337,10 @@ def simplify_function_call( output_expr = LiteralExpression( expr.inputs[0].value.endswith(expr.inputs[1].value), expr.data_type ) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive |= expr.inputs[0].value.endswith( + expr.inputs[1].value + ) + output_predicates.not_negative = True case pydop.CONTAINS: if ( isinstance(expr.inputs[0], LiteralExpression) @@ -233,7 +351,13 @@ def simplify_function_call( output_expr = LiteralExpression( expr.inputs[1].value in expr.inputs[0].value, expr.data_type ) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive |= ( + expr.inputs[1].value in expr.inputs[0].value + ) + output_predicates.not_negative = True + + # SQRT(x) can be constant folded if x is a literal and non-negative. + # Otherwise, it is non-negative, and positive if x is positive. case pydop.SQRT: if ( isinstance(expr.inputs[0], LiteralExpression) @@ -242,9 +366,10 @@ def simplify_function_call( ): sqrt_value: float = expr.inputs[0].value ** 0.5 output_expr = LiteralExpression(sqrt_value, expr.data_type) - if sqrt_value > 0: - output_predicates.add(LogicalPredicate.POSITIVE) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if arg_predicates[0].positive: + output_predicates.positive = True + output_predicates.not_negative = True + case pydop.MONOTONIC: v0: int | float | None = None v1: int | float | None = None @@ -262,11 +387,17 @@ def simplify_function_call( expr.inputs[2].value, (int, float) ): v2 = expr.inputs[2].value + + # MONOTONIC(x, y, z), where x/y/z are all literals + # -> True if x <= y <= z, False otherwise if v0 is not None and v1 is not None and v2 is not None: monotonic_result = (v0 <= v1) and (v1 <= v2) output_expr = LiteralExpression(monotonic_result, expr.data_type) if monotonic_result: - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.positive = True + + # MONOTONIC(x, y, z), where x/y are literals + # -> if x <= y, then y <= z, otherwise False elif v0 is not None and v1 is not None: if v0 <= v1: output_expr = CallExpression( @@ -274,6 +405,9 @@ def simplify_function_call( ) else: output_expr = LiteralExpression(False, expr.data_type) + + # MONOTONIC(x, y, z), where y/z are literals + # -> if y <= z, then x <= y, otherwise False elif v1 is not None and v2 is not None: if v1 <= v2: output_expr = CallExpression( @@ -281,9 +415,14 @@ def simplify_function_call( ) else: output_expr = LiteralExpression(False, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # XOR and LIKE are always non-negative case pydop.BXR | pydop.LIKE: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # X & Y is False if any of the arguments are False-y literals, and True + # if all of the arguments are Truth-y literals. case pydop.BAN: if any( isinstance(arg, LiteralExpression) and arg.value in [0, False, None] @@ -295,7 +434,10 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # X | Y is True if any of the arguments are Truth-y literals, and False + # if all of the arguments are False-y literals. case pydop.BOR: if any( isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] @@ -307,25 +449,42 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(False, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: match (expr.inputs[0], expr.op, expr.inputs[1]): - case (_, pydop.GRT, LiteralExpression()) if ( - expr.inputs[1].value == 0 - and LogicalPredicate.POSITIVE in arg_predicates[0] + # x > y is True if x is positive and y is a literal that is + # zero or negative. The same goes for x >= y. + case (_, pydop.GRT, LiteralExpression()) | ( + _, + pydop.GEQ, + LiteralExpression(), + ) if ( + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value <= 0 + and arg_predicates[0].not_null + and arg_predicates[0].positive ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # x >= y is True if x is non-negative and y is a literal that is + # zero or negative. case (_, pydop.GEQ, LiteralExpression()) if ( - expr.inputs[1].value == 0 - and LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value <= 0 + and arg_predicates[0].not_null + and arg_predicates[0].not_negative ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # The rest of the case of x CMP y can be constant folded if both + # x and y are literals. case (LiteralExpression(), _, LiteralExpression()): match ( expr.inputs[0].value, @@ -357,88 +516,115 @@ def simplify_function_call( case _: pass - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # PRESENT(x) is True if x is non-null. case pydop.PRESENT: - if LogicalPredicate.NOT_NULL in arg_predicates[0]: + if arg_predicates[0].not_null: output_expr = LiteralExpression(True, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive = True + output_predicates.not_null = True + output_predicates.not_negative = True + + # ABSENT(x) is True if x is null. case pydop.ABSENT: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is None + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.positive = True + output_predicates.not_null = True + output_predicates.not_negative = True + + # IFF(True, y, z) -> y (same if the first argument is guaranteed to be + # positive & non-null). + # IFF(False, y, z) -> z + # Otherwise, it inherits the intersection of the predicates of y and z. case pydop.IFF: if isinstance(expr.inputs[0], LiteralExpression): if bool(expr.inputs[0].value): output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] + output_predicates |= arg_predicates[1] else: output_expr = expr.inputs[2] - output_predicates = arg_predicates[2] - elif ( - LogicalPredicate.POSITIVE in arg_predicates[0] - and LogicalPredicate.NOT_NULL in arg_predicates[0] - ): + output_predicates |= arg_predicates[2] + elif arg_predicates[0].not_null and arg_predicates[0].positive: output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] + output_predicates |= arg_predicates[1] else: - output_predicates = arg_predicates[1] & arg_predicates[2] + output_predicates |= arg_predicates[1] & arg_predicates[2] + + # KEEP_IF(x, True) -> x + # KEEP_IF(x, False) -> None case pydop.KEEP_IF: if isinstance(expr.inputs[1], LiteralExpression): if bool(expr.inputs[1].value): output_expr = expr.inputs[0] - output_predicates = arg_predicates[0] + output_predicates |= arg_predicates[0] else: output_expr = LiteralExpression(None, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - elif ( - LogicalPredicate.POSITIVE in arg_predicates[1] - and LogicalPredicate.NOT_NULL in arg_predicates[1] - ): + output_predicates.not_negative = True + elif arg_predicates[1].not_null and arg_predicates[1].positive: output_expr = expr.inputs[0] output_predicates = arg_predicates[0] - elif LogicalPredicate.NOT_NEGATIVE in arg_predicates[0]: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + else: + output_predicates |= arg_predicates[0] & PredicateSet( + not_null=True, not_negative=True + ) return output_expr, output_predicates def simplify_window_call( expr: WindowCallExpression, - arg_predicates: list[set[LogicalPredicate]], -) -> tuple[RelationalExpression, set[LogicalPredicate]]: + arg_predicates: list[PredicateSet], +) -> tuple[RelationalExpression, PredicateSet]: """ TODO """ - output_predicates: set[LogicalPredicate] = set() + output_predicates: PredicateSet = PredicateSet() no_frame: bool = not ( expr.kwargs.get("cumulative", False) or "frame" in expr.kwargs ) match expr.op: + # RANKING & PERCENTILE are always non-null, non-negative, and positive. case pydop.RANKING | pydop.PERCENTILE: - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # RELSUM and RELAVG retain the properties of their argument, but become + # nullable if there is a frame. case pydop.RELSUM | pydop.RELAVG: - if LogicalPredicate.NOT_NULL in arg_predicates[0] and no_frame: - output_predicates.add(LogicalPredicate.NOT_NULL) - if LogicalPredicate.NOT_NEGATIVE in arg_predicates[0]: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - if LogicalPredicate.POSITIVE in arg_predicates[0] and no_frame: - output_predicates.add(LogicalPredicate.POSITIVE) + if arg_predicates[0].not_null and no_frame: + output_predicates.not_null = True + if arg_predicates[0].not_negative: + output_predicates.not_negative = True + if arg_predicates[0].positive: + output_predicates.positive = True + + # RELSIZE is always non-negative, but is only non-null & positive if + # there is no frame. case pydop.RELSIZE: if no_frame: - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.POSITIVE) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_null = True + output_predicates.positive = True + output_predicates.not_negative = True + + # RELCOUNT is always non-negative, but it is only non-null if there is + # no frame, and positive if there is no frame and the first argument + # is non-null. case pydop.RELCOUNT: if no_frame: - output_predicates.add(LogicalPredicate.NOT_NULL) - if LogicalPredicate.NOT_NULL in arg_predicates[0]: - output_predicates.add(LogicalPredicate.POSITIVE) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_null = True + if arg_predicates[0].not_null: + output_predicates.positive = True + output_predicates.not_negative = True return expr, output_predicates -def infer_literal_predicates(expr: LiteralExpression) -> set[LogicalPredicate]: +def infer_literal_predicates(expr: LiteralExpression) -> PredicateSet: """ Infers logical predicates from a literal expression. @@ -448,22 +634,22 @@ def infer_literal_predicates(expr: LiteralExpression) -> set[LogicalPredicate]: Returns: A set of logical predicates inferred from the literal. """ - output_predicates: set[LogicalPredicate] = set() + output_predicates: PredicateSet = PredicateSet() if expr.value is not None: - output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.not_null = True if isinstance(expr.value, (int, float)): if expr.value >= 0: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True if expr.value > 0: - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.positive = True return output_predicates def run_simplification( expr: RelationalExpression, - input_predicates: dict[RelationalExpression, set[LogicalPredicate]], + input_predicates: dict[RelationalExpression, PredicateSet], no_group_aggregate: bool, -) -> tuple[RelationalExpression, set[LogicalPredicate]]: +) -> tuple[RelationalExpression, PredicateSet]: """ Runs the simplification on a single expression, applying any predicates inferred from the input nodes to aid the process and inferring any new @@ -484,15 +670,15 @@ def run_simplification( new_args: list[RelationalExpression] new_partitions: list[RelationalExpression] new_orders: list[ExpressionSortInfo] - arg_predicates: list[set[LogicalPredicate]] - output_predicates: set[LogicalPredicate] = set() + arg_predicates: list[PredicateSet] + output_predicates: PredicateSet = PredicateSet() requires_rewrite: bool = False if isinstance(expr, LiteralExpression): output_predicates = infer_literal_predicates(expr) if isinstance(expr, ColumnReference): - output_predicates = input_predicates.get(expr, set()) + output_predicates = input_predicates.get(expr, PredicateSet()) if isinstance(expr, CallExpression): new_args = [] @@ -552,7 +738,7 @@ def run_simplification( def simplify_expressions( node: RelationalNode, -) -> dict[RelationalExpression, set[LogicalPredicate]]: +) -> dict[RelationalExpression, PredicateSet]: """ The main recursive procedure done to perform expression simplification on a relational node and its descendants. The transformation is done in-place @@ -564,7 +750,7 @@ def simplify_expressions( The predicates inferred from the output columns of the node. """ # Recursively invoke the procedure on all inputs to the node. - input_predicates: dict[RelationalExpression, set[LogicalPredicate]] = {} + input_predicates: dict[RelationalExpression, PredicateSet] = {} for idx, input_node in enumerate(node.inputs): input_alias: str | None = node.default_input_aliases[idx] predicates = simplify_expressions(input_node) @@ -573,7 +759,7 @@ def simplify_expressions( # Transform the expressions of the current node in-place. ref_expr: RelationalExpression - output_predicates: dict[RelationalExpression, set[LogicalPredicate]] = {} + output_predicates: dict[RelationalExpression, PredicateSet] = {} match node: case ( Project() @@ -612,8 +798,7 @@ def simplify_expressions( isinstance(expr, ColumnReference) and expr.input_name != node.default_input_aliases[0] ): - preds.discard(LogicalPredicate.NOT_NULL) - preds.discard(LogicalPredicate.POSITIVE) + preds.not_null = False case Aggregate(): for name, expr in node.keys.items(): ref_expr = ColumnReference(name, expr.data_type) diff --git a/tests/test_plan_refsols/simplification_1.txt b/tests/test_plan_refsols/simplification_1.txt index 9a342e6c4..6089ea8c9 100644 --- a/tests/test_plan_refsols/simplification_1.txt +++ b/tests/test_plan_refsols/simplification_1.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('s00', 13:numeric), ('s01', 0:numeric), ('s02', n_rows), ('s03', n_rows + 5:numeric), ('s04', n_rows * 2:numeric), ('s05', n_rows / 8.0:numeric), ('s06', 10:numeric), ('s07', n_rows), ('s08', ABS(n_rows - 25:numeric)), ('s09', n_rows + 1:numeric), ('s10', n_rows - 3:numeric), ('s11', n_rows * -1:numeric), ('s12', n_rows / 2.5:numeric), ('s13', n_rows > 10:numeric), ('s14', n_rows >= 10:numeric), ('s15', n_rows == 20:numeric), ('s16', n_rows != 25:numeric), ('s17', n_rows < 25:numeric), ('s18', n_rows <= 25:numeric), ('s19', n_rows), ('s20', DEFAULT_TO(avg_expr_4, 0:numeric)), ('s21', True:bool), ('s22', True:bool), ('s23', True:bool)], orderings=[]) +ROOT(columns=[('s00', 13:numeric), ('s01', 0:numeric), ('s02', n_rows), ('s03', n_rows + 5:numeric), ('s04', n_rows * 2:numeric), ('s05', n_rows / 8.0:numeric), ('s06', 10:numeric), ('s07', n_rows), ('s08', ABS(n_rows - 25:numeric)), ('s09', n_rows + 1:numeric), ('s10', n_rows - 3:numeric), ('s11', n_rows * -1:numeric), ('s12', n_rows / 2.5:numeric), ('s13', n_rows > 10:numeric), ('s14', n_rows >= 10:numeric), ('s15', n_rows == 20:numeric), ('s16', n_rows != 25:numeric), ('s17', n_rows < 25:numeric), ('s18', n_rows <= 25:numeric), ('s19', n_rows), ('s20', avg_expr_4), ('s21', True:bool), ('s22', True:bool), ('s23', True:bool)], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_expr_4': AVG(DEFAULT_TO(LENGTH(sbCustName), 0:numeric)), 'n_rows': COUNT()}) SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/test_sql_refsols/simplification_1_ansi.sql b/tests/test_sql_refsols/simplification_1_ansi.sql index a07916fd6..9e7a4ecbe 100644 --- a/tests/test_sql_refsols/simplification_1_ansi.sql +++ b/tests/test_sql_refsols/simplification_1_ansi.sql @@ -19,7 +19,7 @@ SELECT COUNT(*) < 25 AS s17, COUNT(*) <= 25 AS s18, COUNT(*) AS s19, - COALESCE(AVG(COALESCE(LENGTH(sbcustname), 0)), 0) AS s20, + AVG(COALESCE(LENGTH(sbcustname), 0)) AS s20, TRUE AS s21, TRUE AS s22, TRUE AS s23 diff --git a/tests/test_sql_refsols/simplification_1_sqlite.sql b/tests/test_sql_refsols/simplification_1_sqlite.sql index 39f87c5f9..77364717e 100644 --- a/tests/test_sql_refsols/simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/simplification_1_sqlite.sql @@ -19,7 +19,7 @@ SELECT COUNT(*) < 25 AS s17, COUNT(*) <= 25 AS s18, COUNT(*) AS s19, - COALESCE(AVG(COALESCE(LENGTH(sbcustname), 0)), 0) AS s20, + AVG(COALESCE(LENGTH(sbcustname), 0)) AS s20, TRUE AS s21, TRUE AS s22, TRUE AS s23 From e6c9fbe8c79662c11e4739d5920f06c79adb5ad5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 23 Jul 2025 15:41:07 -0400 Subject: [PATCH 066/143] Refactoring to use shuttles & visitors for simplification --- .../conversion/relational_simplification.py | 1292 +++++++++-------- pydough/relational/__init__.py | 2 + .../relational_expressions/__init__.py | 2 + .../relational_nodes/relational_visitor.py | 3 +- 4 files changed, 692 insertions(+), 607 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 341ff9a0d..dc1f9e1b1 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -12,8 +12,8 @@ Aggregate, CallExpression, ColumnReference, + CorrelatedReference, EmptySingleton, - ExpressionSortInfo, Filter, Join, JoinType, @@ -21,8 +21,10 @@ LiteralExpression, Project, RelationalExpression, + RelationalExpressionShuttle, RelationalNode, RelationalRoot, + RelationalVisitor, Scan, WindowCallExpression, ) @@ -157,666 +159,744 @@ def intersect(predicates: list["PredicateSet"]) -> "PredicateSet": } -def simplify_function_call( - expr: CallExpression, - arg_predicates: list[PredicateSet], - no_group_aggregate: bool, -) -> tuple[RelationalExpression, PredicateSet]: +class SimplificationShuttle(RelationalExpressionShuttle): """ TODO """ - output_expr: RelationalExpression = expr - output_predicates: PredicateSet = PredicateSet() - union_set: PredicateSet = PredicateSet.union(arg_predicates) - intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) - - # If the call has null propagating rules, all of hte arguments are non-null, - # the output is guaranteed to be non-null. - if expr.op in NULL_PROPAGATING_OPS: - if intersect_set.not_null: - output_predicates.not_null = True - match expr.op: - case pydop.COUNT | pydop.NDISTINCT: - # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be non-null - # and non-negative. - output_predicates.not_null = True - output_predicates.not_negative = True + def __init__(self): + self.stack: list[PredicateSet] = [] + self._input_predicates: dict[RelationalExpression, PredicateSet] = {} + self._no_group_aggregate: bool = False - # The output if COUNT(*) is positive if unless doing a no-groupby - # aggregation. Same goes for calling COUNT or NDISTINCT ona non-null - # column. - if not no_group_aggregate: - if len(expr.inputs) == 0 or arg_predicates[0].not_null: - output_predicates.positive = True + @property + def input_predicates(self) -> dict[RelationalExpression, PredicateSet]: + """ + Returns the input predicates that were passed to the shuttle. + """ + return self._input_predicates - # COUNT(x) where x is non-null can be rewritten as COUNT(*), which - # has the same positive rule as before. - elif ( - expr.op == pydop.COUNT - and len(expr.inputs) == 1 - and arg_predicates[0].not_null - ): - if not no_group_aggregate: - output_predicates.positive = True - output_expr = CallExpression(pydop.COUNT, expr.data_type, []) - - # All of these operators are non-null aor non-negative if their first - # argument is. - case ( - pydop.SUM - | pydop.AVG - | pydop.MIN - | pydop.MAX - | pydop.ANYTHING - | pydop.MEDIAN - | pydop.QUANTILE - ): - output_predicates |= arg_predicates[0] & PredicateSet( - not_null=True, not_negative=True - ) - - # The result of addition is non-negative or positive if all the - # operands are. It is also positive if all the operands are non-negative - # and at least one of them is positive. - case pydop.ADD: - output_predicates |= intersect_set & PredicateSet( - not_negative=True, positive=True - ) - if intersect_set.not_negative and union_set.positive: - output_predicates.positive = True - - # The result of multiplication is non-negative or positive if all the - # operands are. - case pydop.MUL: - output_predicates |= intersect_set & PredicateSet( - not_negative=True, positive=True - ) - - # The result of division is non-negative or positive if all the - # operands are, and is also non-null if both operands are non-null and - # the second operand is positive. - case pydop.DIV: - output_predicates |= intersect_set & PredicateSet( - not_negative=True, positive=True - ) - if ( - arg_predicates[0].not_null - and arg_predicates[1].not_null - and arg_predicates[1].positive - ): - output_predicates.not_null = True + @input_predicates.setter + def input_predicates(self, value: dict[RelationalExpression, PredicateSet]) -> None: + """ + Sets the input predicates for the shuttle. + """ + self._input_predicates = value - case pydop.DEFAULT_TO: - # DEFAULT_TO(None, x) -> x - if ( - isinstance(expr.inputs[0], LiteralExpression) - and expr.inputs[0].value is None - ): - if len(expr.inputs) == 2: - output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] - else: - output_expr = CallExpression( - pydop.DEFAULT_TO, expr.data_type, expr.inputs[1:] - ) - output_predicates |= PredicateSet.intersect(arg_predicates[1:]) + @property + def no_group_aggregate(self) -> bool: + """ + Returns whether the shuttle currently a handling no-group-aggregate. + """ + return self._no_group_aggregate - # DEFAULT_TO(x, y) -> x if x is non-null. - elif arg_predicates[0].not_null: - output_expr = expr.inputs[0] - output_predicates |= arg_predicates[0] + @no_group_aggregate.setter + def no_group_aggregate(self, value: bool) -> None: + """ + Sets whether the shuttle is handling a no-group-aggregate. + """ + self._no_group_aggregate = value - # Otherwise, it is non-null if any of the arguments are non-null, - # and gains any predicates that all the arguments have in common. - else: - if union_set.not_null: - output_predicates.not_null = True - output_predicates |= intersect_set - - # ABS(x) -> x if x is positive or non-negative. At hte very least, we - # know it is always non-negative. - case pydop.ABS: - if arg_predicates[0].not_negative or arg_predicates[0].positive: - output_expr = expr.inputs[0] - output_predicates |= arg_predicates[0] - else: + def reset(self) -> None: + self.stack = [] + + def visit_literal_expression( + self, literal_expression: LiteralExpression + ) -> RelationalExpression: + output_predicates: PredicateSet = PredicateSet() + if literal_expression.value is not None: + output_predicates.not_null = True + if isinstance(literal_expression.value, (int, float)): + if literal_expression.value >= 0: + output_predicates.not_negative = True + if literal_expression.value > 0: + output_predicates.positive = True + self.stack.append(output_predicates) + return literal_expression + + def visit_column_reference( + self, column_reference: ColumnReference + ) -> RelationalExpression: + self.stack.append(self.input_predicates.get(column_reference, PredicateSet())) + return column_reference + + def visit_correlated_reference( + self, correlated_reference: CorrelatedReference + ) -> RelationalExpression: + self.stack.append(PredicateSet()) + return correlated_reference + + def visit_call_expression( + self, call_expression: CallExpression + ) -> RelationalExpression: + new_call = super().visit_call_expression(call_expression) + assert isinstance(new_call, CallExpression) + arg_predicates: list[PredicateSet] = [ + self.stack.pop() for _ in range(len(new_call.inputs)) + ] + arg_predicates.reverse() + return self.simplify_function_call( + new_call, arg_predicates, self.no_group_aggregate + ) + + def visit_window_expression( + self, window_expression: WindowCallExpression + ) -> RelationalExpression: + new_window = super().visit_window_expression(window_expression) + assert isinstance(new_window, WindowCallExpression) + for _ in range(len(new_window.order_inputs)): + self.stack.pop() + for _ in range(len(new_window.partition_inputs)): + self.stack.pop() + arg_predicates: list[PredicateSet] = [ + self.stack.pop() for _ in range(len(new_window.inputs)) + ] + arg_predicates.reverse() + return self.simplify_window_call(new_window, arg_predicates) + + def simplify_function_call( + self, + expr: CallExpression, + arg_predicates: list[PredicateSet], + no_group_aggregate: bool, + ) -> RelationalExpression: + """ + TODO + """ + output_expr: RelationalExpression = expr + output_predicates: PredicateSet = PredicateSet() + union_set: PredicateSet = PredicateSet.union(arg_predicates) + intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) + + # If the call has null propagating rules, all of hte arguments are non-null, + # the output is guaranteed to be non-null. + if expr.op in NULL_PROPAGATING_OPS: + if intersect_set.not_null: + output_predicates.not_null = True + + match expr.op: + case pydop.COUNT | pydop.NDISTINCT: + # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be non-null + # and non-negative. + output_predicates.not_null = True output_predicates.not_negative = True - # LENGTH(x) can be constant folded if x is a string literal. Otherwise, - # we know it is non-negative. - case pydop.LENGTH: - if isinstance(expr.inputs[0], LiteralExpression) and isinstance( - expr.inputs[0].value, str - ): - str_len: int = len(expr.inputs[0].value) - output_expr = LiteralExpression(str_len, expr.data_type) - if str_len > 0: - output_predicates.positive = True - output_predicates.not_negative = True - - # LOWER, UPPER, STARTSWITH, ENDSWITH, and CONTAINS can be constant - # folded if the inputs are string literals. The boolean-returning - # operators are always non-negative. - case pydop.LOWER: - if isinstance(expr.inputs[0], LiteralExpression) and isinstance( - expr.inputs[0].value, str - ): - output_expr = LiteralExpression( - expr.inputs[0].value.lower(), expr.data_type - ) - case pydop.UPPER: - if isinstance(expr.inputs[0], LiteralExpression) and isinstance( - expr.inputs[0].value, str - ): - output_expr = LiteralExpression( - expr.inputs[0].value.upper(), expr.data_type - ) - case pydop.STARTSWITH: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and isinstance(expr.inputs[0].value, str) - and isinstance(expr.inputs[1], LiteralExpression) - and isinstance(expr.inputs[1].value, str) - ): - output_expr = LiteralExpression( - expr.inputs[0].value.startswith(expr.inputs[1].value), - expr.data_type, - ) - output_predicates.positive |= expr.inputs[0].value.startswith( - expr.inputs[1].value - ) - output_predicates.not_negative = True - case pydop.ENDSWITH: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and isinstance(expr.inputs[0].value, str) - and isinstance(expr.inputs[1], LiteralExpression) - and isinstance(expr.inputs[1].value, str) + # The output if COUNT(*) is positive if unless doing a no-groupby + # aggregation. Same goes for calling COUNT or NDISTINCT ona non-null + # column. + if not no_group_aggregate: + if len(expr.inputs) == 0 or arg_predicates[0].not_null: + output_predicates.positive = True + + # COUNT(x) where x is non-null can be rewritten as COUNT(*), which + # has the same positive rule as before. + elif ( + expr.op == pydop.COUNT + and len(expr.inputs) == 1 + and arg_predicates[0].not_null + ): + if not no_group_aggregate: + output_predicates.positive = True + output_expr = CallExpression(pydop.COUNT, expr.data_type, []) + + # All of these operators are non-null aor non-negative if their first + # argument is. + case ( + pydop.SUM + | pydop.AVG + | pydop.MIN + | pydop.MAX + | pydop.ANYTHING + | pydop.MEDIAN + | pydop.QUANTILE ): - output_expr = LiteralExpression( - expr.inputs[0].value.endswith(expr.inputs[1].value), expr.data_type + output_predicates |= arg_predicates[0] & PredicateSet( + not_null=True, not_negative=True ) - output_predicates.positive |= expr.inputs[0].value.endswith( - expr.inputs[1].value + + # The result of addition is non-negative or positive if all the + # operands are. It is also positive if all the operands are non-negative + # and at least one of them is positive. + case pydop.ADD: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True ) - output_predicates.not_negative = True - case pydop.CONTAINS: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and isinstance(expr.inputs[0].value, str) - and isinstance(expr.inputs[1], LiteralExpression) - and isinstance(expr.inputs[1].value, str) - ): - output_expr = LiteralExpression( - expr.inputs[1].value in expr.inputs[0].value, expr.data_type + if intersect_set.not_negative and union_set.positive: + output_predicates.positive = True + + # The result of multiplication is non-negative or positive if all the + # operands are. + case pydop.MUL: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True ) - output_predicates.positive |= ( - expr.inputs[1].value in expr.inputs[0].value + + # The result of division is non-negative or positive if all the + # operands are, and is also non-null if both operands are non-null and + # the second operand is positive. + case pydop.DIV: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True ) - output_predicates.not_negative = True - - # SQRT(x) can be constant folded if x is a literal and non-negative. - # Otherwise, it is non-negative, and positive if x is positive. - case pydop.SQRT: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and isinstance(expr.inputs[0].value, (int, float)) - and expr.inputs[0].value >= 0 - ): - sqrt_value: float = expr.inputs[0].value ** 0.5 - output_expr = LiteralExpression(sqrt_value, expr.data_type) - if arg_predicates[0].positive: - output_predicates.positive = True - output_predicates.not_negative = True - - case pydop.MONOTONIC: - v0: int | float | None = None - v1: int | float | None = None - v2: int | float | None = None - monotonic_result: bool - if isinstance(expr.inputs[0], LiteralExpression) and isinstance( - expr.inputs[0].value, (int, float) - ): - v0 = expr.inputs[0].value - if isinstance(expr.inputs[1], LiteralExpression) and isinstance( - expr.inputs[1].value, (int, float) - ): - v1 = expr.inputs[1].value - if isinstance(expr.inputs[2], LiteralExpression) and isinstance( - expr.inputs[2].value, (int, float) - ): - v2 = expr.inputs[2].value - - # MONOTONIC(x, y, z), where x/y/z are all literals - # -> True if x <= y <= z, False otherwise - if v0 is not None and v1 is not None and v2 is not None: - monotonic_result = (v0 <= v1) and (v1 <= v2) - output_expr = LiteralExpression(monotonic_result, expr.data_type) - if monotonic_result: - output_predicates.positive = True + if ( + arg_predicates[0].not_null + and arg_predicates[1].not_null + and arg_predicates[1].positive + ): + output_predicates.not_null = True - # MONOTONIC(x, y, z), where x/y are literals - # -> if x <= y, then y <= z, otherwise False - elif v0 is not None and v1 is not None: - if v0 <= v1: - output_expr = CallExpression( - pydop.LEQ, expr.data_type, expr.inputs[1:] - ) + case pydop.DEFAULT_TO: + # DEFAULT_TO(None, x) -> x + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is None + ): + if len(expr.inputs) == 2: + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] + else: + output_expr = CallExpression( + pydop.DEFAULT_TO, expr.data_type, expr.inputs[1:] + ) + output_predicates |= PredicateSet.intersect(arg_predicates[1:]) + + # DEFAULT_TO(x, y) -> x if x is non-null. + elif arg_predicates[0].not_null: + output_expr = expr.inputs[0] + output_predicates |= arg_predicates[0] + + # Otherwise, it is non-null if any of the arguments are non-null, + # and gains any predicates that all the arguments have in common. else: - output_expr = LiteralExpression(False, expr.data_type) + if union_set.not_null: + output_predicates.not_null = True + output_predicates |= intersect_set + + # ABS(x) -> x if x is positive or non-negative. At hte very least, we + # know it is always non-negative. + case pydop.ABS: + if arg_predicates[0].not_negative or arg_predicates[0].positive: + output_expr = expr.inputs[0] + output_predicates |= arg_predicates[0] + else: + output_predicates.not_negative = True + + # LENGTH(x) can be constant folded if x is a string literal. Otherwise, + # we know it is non-negative. + case pydop.LENGTH: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + str_len: int = len(expr.inputs[0].value) + output_expr = LiteralExpression(str_len, expr.data_type) + if str_len > 0: + output_predicates.positive = True + output_predicates.not_negative = True - # MONOTONIC(x, y, z), where y/z are literals - # -> if y <= z, then x <= y, otherwise False - elif v1 is not None and v2 is not None: - if v1 <= v2: - output_expr = CallExpression( - pydop.LEQ, expr.data_type, expr.inputs[:2] + # LOWER, UPPER, STARTSWITH, ENDSWITH, and CONTAINS can be constant + # folded if the inputs are string literals. The boolean-returning + # operators are always non-negative. + case pydop.LOWER: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + output_expr = LiteralExpression( + expr.inputs[0].value.lower(), expr.data_type ) - else: + case pydop.UPPER: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + output_expr = LiteralExpression( + expr.inputs[0].value.upper(), expr.data_type + ) + case pydop.STARTSWITH: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[0].value.startswith(expr.inputs[1].value), + expr.data_type, + ) + output_predicates.positive |= expr.inputs[0].value.startswith( + expr.inputs[1].value + ) + output_predicates.not_negative = True + case pydop.ENDSWITH: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[0].value.endswith(expr.inputs[1].value), + expr.data_type, + ) + output_predicates.positive |= expr.inputs[0].value.endswith( + expr.inputs[1].value + ) + output_predicates.not_negative = True + case pydop.CONTAINS: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[1].value in expr.inputs[0].value, expr.data_type + ) + output_predicates.positive |= ( + expr.inputs[1].value in expr.inputs[0].value + ) + output_predicates.not_negative = True + + # SQRT(x) can be constant folded if x is a literal and non-negative. + # Otherwise, it is non-negative, and positive if x is positive. + case pydop.SQRT: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, (int, float)) + and expr.inputs[0].value >= 0 + ): + sqrt_value: float = expr.inputs[0].value ** 0.5 + output_expr = LiteralExpression(sqrt_value, expr.data_type) + if arg_predicates[0].positive: + output_predicates.positive = True + output_predicates.not_negative = True + + case pydop.MONOTONIC: + v0: int | float | None = None + v1: int | float | None = None + v2: int | float | None = None + monotonic_result: bool + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, (int, float) + ): + v0 = expr.inputs[0].value + if isinstance(expr.inputs[1], LiteralExpression) and isinstance( + expr.inputs[1].value, (int, float) + ): + v1 = expr.inputs[1].value + if isinstance(expr.inputs[2], LiteralExpression) and isinstance( + expr.inputs[2].value, (int, float) + ): + v2 = expr.inputs[2].value + + # MONOTONIC(x, y, z), where x/y/z are all literals + # -> True if x <= y <= z, False otherwise + if v0 is not None and v1 is not None and v2 is not None: + monotonic_result = (v0 <= v1) and (v1 <= v2) + output_expr = LiteralExpression(monotonic_result, expr.data_type) + if monotonic_result: + output_predicates.positive = True + + # MONOTONIC(x, y, z), where x/y are literals + # -> if x <= y, then y <= z, otherwise False + elif v0 is not None and v1 is not None: + if v0 <= v1: + output_expr = CallExpression( + pydop.LEQ, expr.data_type, expr.inputs[1:] + ) + else: + output_expr = LiteralExpression(False, expr.data_type) + + # MONOTONIC(x, y, z), where y/z are literals + # -> if y <= z, then x <= y, otherwise False + elif v1 is not None and v2 is not None: + if v1 <= v2: + output_expr = CallExpression( + pydop.LEQ, expr.data_type, expr.inputs[:2] + ) + else: + output_expr = LiteralExpression(False, expr.data_type) + output_predicates.not_negative = True + + # XOR and LIKE are always non-negative + case pydop.BXR | pydop.LIKE: + output_predicates.not_negative = True + + # X & Y is False if any of the arguments are False-y literals, and True + # if all of the arguments are Truth-y literals. + case pydop.BAN: + if any( + isinstance(arg, LiteralExpression) and arg.value in [0, False, None] + for arg in expr.inputs + ): output_expr = LiteralExpression(False, expr.data_type) - output_predicates.not_negative = True - - # XOR and LIKE are always non-negative - case pydop.BXR | pydop.LIKE: - output_predicates.not_negative = True - - # X & Y is False if any of the arguments are False-y literals, and True - # if all of the arguments are Truth-y literals. - case pydop.BAN: - if any( - isinstance(arg, LiteralExpression) and arg.value in [0, False, None] - for arg in expr.inputs - ): - output_expr = LiteralExpression(False, expr.data_type) - if all( - isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] - for arg in expr.inputs - ): - output_expr = LiteralExpression(True, expr.data_type) - output_predicates.not_negative = True - - # X | Y is True if any of the arguments are Truth-y literals, and False - # if all of the arguments are False-y literals. - case pydop.BOR: - if any( - isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] - for arg in expr.inputs - ): - output_expr = LiteralExpression(True, expr.data_type) - if all( - isinstance(arg, LiteralExpression) and arg.value in [0, False, None] - for arg in expr.inputs - ): - output_expr = LiteralExpression(False, expr.data_type) - output_predicates.not_negative = True - - case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: - match (expr.inputs[0], expr.op, expr.inputs[1]): - # x > y is True if x is positive and y is a literal that is - # zero or negative. The same goes for x >= y. - case (_, pydop.GRT, LiteralExpression()) | ( - _, - pydop.GEQ, - LiteralExpression(), - ) if ( - isinstance(expr.inputs[1].value, (int, float, bool)) - and expr.inputs[1].value <= 0 - and arg_predicates[0].not_null - and arg_predicates[0].positive + if all( + isinstance(arg, LiteralExpression) + and arg.value not in [0, False, None] + for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates |= PredicateSet( - not_null=True, not_negative=True, positive=True - ) + output_predicates.not_negative = True - # x >= y is True if x is non-negative and y is a literal that is - # zero or negative. - case (_, pydop.GEQ, LiteralExpression()) if ( - isinstance(expr.inputs[1].value, (int, float, bool)) - and expr.inputs[1].value <= 0 - and arg_predicates[0].not_null - and arg_predicates[0].not_negative + # X | Y is True if any of the arguments are Truth-y literals, and False + # if all of the arguments are False-y literals. + case pydop.BOR: + if any( + isinstance(arg, LiteralExpression) + and arg.value not in [0, False, None] + for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates |= PredicateSet( - not_null=True, not_negative=True, positive=True - ) + if all( + isinstance(arg, LiteralExpression) and arg.value in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(False, expr.data_type) + output_predicates.not_negative = True - # The rest of the case of x CMP y can be constant folded if both - # x and y are literals. - case (LiteralExpression(), _, LiteralExpression()): - match ( - expr.inputs[0].value, - expr.inputs[1].value, - expr.op, + # NOT(x) is True if x is a False-y literal, and False if x is a + # Truth-y literal. + case pydop.NOT: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is not None + ): + output_expr = LiteralExpression( + not bool(expr.inputs[0].value), expr.data_type + ) + output_predicates.positive = not bool(expr.inputs[0].value) + output_predicates.not_negative = True + pass + + case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: + match (expr.inputs[0], expr.op, expr.inputs[1]): + # x > y is True if x is positive and y is a literal that is + # zero or negative. The same goes for x >= y. + case (_, pydop.GRT, LiteralExpression()) | ( + _, + pydop.GEQ, + LiteralExpression(), + ) if ( + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value <= 0 + and arg_predicates[0].not_null + and arg_predicates[0].positive ): - case (None, _, _) | (_, None, _): - output_expr = LiteralExpression(None, expr.data_type) - case (x, y, pydop.EQU): - output_expr = LiteralExpression(x == y, expr.data_type) - case (x, y, pydop.NEQ): - output_expr = LiteralExpression(x != y, expr.data_type) - case (x, y, pydop.LET) if isinstance( - x, (int, float, str, bool) - ) and isinstance(y, (int, float, str, bool)): - output_expr = LiteralExpression(x < y, expr.data_type) # type: ignore - case (x, y, pydop.LEQ) if isinstance( - x, (int, float, str, bool) - ) and isinstance(y, (int, float, str, bool)): - output_expr = LiteralExpression(x <= y, expr.data_type) # type: ignore - case (x, y, pydop.GRT) if isinstance( - x, (int, float, str, bool) - ) and isinstance(y, (int, float, str, bool)): - output_expr = LiteralExpression(x > y, expr.data_type) # type: ignore - case (x, y, pydop.GEQ) if isinstance( - x, (int, float, str, bool) - ) and isinstance(y, (int, float, str, bool)): - output_expr = LiteralExpression(x >= y, expr.data_type) # type: ignore - - case _: - pass - - output_predicates.not_negative = True - - # PRESENT(x) is True if x is non-null. - case pydop.PRESENT: - if arg_predicates[0].not_null: - output_expr = LiteralExpression(True, expr.data_type) - output_predicates.positive = True - output_predicates.not_null = True - output_predicates.not_negative = True + output_expr = LiteralExpression(True, expr.data_type) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # x >= y is True if x is non-negative and y is a literal + # that is zero or negative. + case (_, pydop.GEQ, LiteralExpression()) if ( + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value <= 0 + and arg_predicates[0].not_null + and arg_predicates[0].not_negative + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # The rest of the case of x CMP y can be constant folded if + # both x and y are literals. + case (LiteralExpression(), _, LiteralExpression()): + match ( + expr.inputs[0].value, + expr.inputs[1].value, + expr.op, + ): + case (None, _, _) | (_, None, _): + output_expr = LiteralExpression(None, expr.data_type) + case (x, y, pydop.EQU): + output_expr = LiteralExpression(x == y, expr.data_type) + case (x, y, pydop.NEQ): + output_expr = LiteralExpression(x != y, expr.data_type) + case (x, y, pydop.LET) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x < y, expr.data_type) # type: ignore + case (x, y, pydop.LEQ) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x <= y, expr.data_type) # type: ignore + case (x, y, pydop.GRT) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x > y, expr.data_type) # type: ignore + case (x, y, pydop.GEQ) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x >= y, expr.data_type) # type: ignore + + case _: + pass - # ABSENT(x) is True if x is null. - case pydop.ABSENT: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and expr.inputs[0].value is None - ): - output_expr = LiteralExpression(True, expr.data_type) - output_predicates.positive = True - output_predicates.not_null = True - output_predicates.not_negative = True - - # IFF(True, y, z) -> y (same if the first argument is guaranteed to be - # positive & non-null). - # IFF(False, y, z) -> z - # Otherwise, it inherits the intersection of the predicates of y and z. - case pydop.IFF: - if isinstance(expr.inputs[0], LiteralExpression): - if bool(expr.inputs[0].value): + output_predicates.not_negative = True + + # PRESENT(x) is True if x is non-null. + case pydop.PRESENT: + if arg_predicates[0].not_null: + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.positive = True + output_predicates.not_null = True + output_predicates.not_negative = True + + # ABSENT(x) is True if x is null. + case pydop.ABSENT: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is None + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.positive = True + output_predicates.not_null = True + output_predicates.not_negative = True + + # IFF(True, y, z) -> y (same if the first argument is guaranteed to + # be positive & non-null). + # IFF(False, y, z) -> z + # Otherwise, uses the intersection of the predicates of y and z. + case pydop.IFF: + if isinstance(expr.inputs[0], LiteralExpression): + if bool(expr.inputs[0].value): + output_expr = expr.inputs[1] + output_predicates |= arg_predicates[1] + else: + output_expr = expr.inputs[2] + output_predicates |= arg_predicates[2] + elif arg_predicates[0].not_null and arg_predicates[0].positive: output_expr = expr.inputs[1] output_predicates |= arg_predicates[1] else: - output_expr = expr.inputs[2] - output_predicates |= arg_predicates[2] - elif arg_predicates[0].not_null and arg_predicates[0].positive: - output_expr = expr.inputs[1] - output_predicates |= arg_predicates[1] - else: - output_predicates |= arg_predicates[1] & arg_predicates[2] - - # KEEP_IF(x, True) -> x - # KEEP_IF(x, False) -> None - case pydop.KEEP_IF: - if isinstance(expr.inputs[1], LiteralExpression): - if bool(expr.inputs[1].value): + output_predicates |= arg_predicates[1] & arg_predicates[2] + + # KEEP_IF(x, True) -> x + # KEEP_IF(x, False) -> None + case pydop.KEEP_IF: + if isinstance(expr.inputs[1], LiteralExpression): + if bool(expr.inputs[1].value): + output_expr = expr.inputs[0] + output_predicates |= arg_predicates[0] + else: + output_expr = LiteralExpression(None, expr.data_type) + output_predicates.not_negative = True + elif arg_predicates[1].not_null and arg_predicates[1].positive: output_expr = expr.inputs[0] - output_predicates |= arg_predicates[0] + output_predicates = arg_predicates[0] else: - output_expr = LiteralExpression(None, expr.data_type) - output_predicates.not_negative = True - elif arg_predicates[1].not_null and arg_predicates[1].positive: - output_expr = expr.inputs[0] - output_predicates = arg_predicates[0] - else: - output_predicates |= arg_predicates[0] & PredicateSet( - not_null=True, not_negative=True + output_predicates |= arg_predicates[0] & PredicateSet( + not_null=True, not_negative=True + ) + + self.stack.append(output_predicates) + return output_expr + + def simplify_window_call( + self, + expr: WindowCallExpression, + arg_predicates: list[PredicateSet], + ) -> RelationalExpression: + """ + TODO + """ + output_predicates: PredicateSet = PredicateSet() + output_expr: RelationalExpression = expr + no_frame: bool = not ( + expr.kwargs.get("cumulative", False) or "frame" in expr.kwargs + ) + match expr.op: + # RANKING & PERCENTILE are always non-null, non-negative, and + # positive. + case pydop.RANKING | pydop.PERCENTILE: + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True ) - return output_expr, output_predicates + # RELSUM and RELAVG retain the properties of their argument, but + # become nullable if there is a frame. + case pydop.RELSUM | pydop.RELAVG: + if arg_predicates[0].not_null and no_frame: + output_predicates.not_null = True + if arg_predicates[0].not_negative: + output_predicates.not_negative = True + if arg_predicates[0].positive: + output_predicates.positive = True -def simplify_window_call( - expr: WindowCallExpression, - arg_predicates: list[PredicateSet], -) -> tuple[RelationalExpression, PredicateSet]: - """ - TODO - """ - output_predicates: PredicateSet = PredicateSet() - no_frame: bool = not ( - expr.kwargs.get("cumulative", False) or "frame" in expr.kwargs - ) - match expr.op: - # RANKING & PERCENTILE are always non-null, non-negative, and positive. - case pydop.RANKING | pydop.PERCENTILE: - output_predicates |= PredicateSet( - not_null=True, not_negative=True, positive=True - ) - - # RELSUM and RELAVG retain the properties of their argument, but become - # nullable if there is a frame. - case pydop.RELSUM | pydop.RELAVG: - if arg_predicates[0].not_null and no_frame: - output_predicates.not_null = True - if arg_predicates[0].not_negative: + # RELSIZE is always non-negative, but is only non-null & positive if + # there is no frame. + case pydop.RELSIZE: + if no_frame: + output_predicates.not_null = True + output_predicates.positive = True output_predicates.not_negative = True - if arg_predicates[0].positive: - output_predicates.positive = True - # RELSIZE is always non-negative, but is only non-null & positive if - # there is no frame. - case pydop.RELSIZE: - if no_frame: - output_predicates.not_null = True - output_predicates.positive = True - output_predicates.not_negative = True - - # RELCOUNT is always non-negative, but it is only non-null if there is - # no frame, and positive if there is no frame and the first argument - # is non-null. - case pydop.RELCOUNT: - if no_frame: - output_predicates.not_null = True - if arg_predicates[0].not_null: - output_predicates.positive = True - output_predicates.not_negative = True - return expr, output_predicates + # RELCOUNT is always non-negative, but it is only non-null if there + # is no frame, and positive if there is no frame and the first + # argument is non-null. + case pydop.RELCOUNT: + if no_frame: + output_predicates.not_null = True + if arg_predicates[0].not_null: + output_predicates.positive = True + output_predicates.not_negative = True + + self.stack.append(output_predicates) + return output_expr -def infer_literal_predicates(expr: LiteralExpression) -> PredicateSet: +class SimplificationVisitor(RelationalVisitor): + """ + TODO """ - Infers logical predicates from a literal expression. - Args: - `expr`: The literal expression to infer predicates from. + def __init__(self): + self.stack: list[dict[RelationalExpression, PredicateSet]] = [] + self.shuttle: SimplificationShuttle = SimplificationShuttle() - Returns: - A set of logical predicates inferred from the literal. - """ - output_predicates: PredicateSet = PredicateSet() - if expr.value is not None: - output_predicates.not_null = True - if isinstance(expr.value, (int, float)): - if expr.value >= 0: - output_predicates.not_negative = True - if expr.value > 0: - output_predicates.positive = True - return output_predicates + def reset(self): + self.stack.clear() + self.shuttle.reset() + def get_input_predicates( + self, node: RelationalNode + ) -> dict[RelationalExpression, PredicateSet]: + """ + TODO + """ + # Recursively invoke the procedure on all inputs to the node. + self.visit_inputs(node) + + # For each input, pop the predicates from the stack and add them + # to the input predicates dictionary, using the appropriate input alias. + input_predicates: dict[RelationalExpression, PredicateSet] = {} + for i in reversed(range(len(node.inputs))): + input_alias: str | None = node.default_input_aliases[i] + predicates: dict[RelationalExpression, PredicateSet] = self.stack.pop() + for expr, preds in predicates.items(): + input_predicates[add_input_name(expr, input_alias)] = preds + + return input_predicates + + def generic_visit( + self, node: RelationalNode + ) -> dict[RelationalExpression, PredicateSet]: + """ + TODO + """ + input_predicates: dict[RelationalExpression, PredicateSet] = ( + self.get_input_predicates(node) + ) + self.shuttle.input_predicates = input_predicates + self.shuttle.no_group_aggregate = not ( + isinstance(node, Aggregate) and not node.keys + ) + # Transform the expressions of the current node in-place. + ref_expr: RelationalExpression + output_predicates: dict[RelationalExpression, PredicateSet] = {} + for name, expr in node.columns.items(): + ref_expr = ColumnReference(name, expr.data_type) + node.columns[name] = expr.accept_shuttle(self.shuttle) + output_predicates[ref_expr] = self.shuttle.stack.pop() + return output_predicates + + def visit_scan(self, node: Scan) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + self.stack.append(output_predicates) -def run_simplification( - expr: RelationalExpression, - input_predicates: dict[RelationalExpression, PredicateSet], - no_group_aggregate: bool, -) -> tuple[RelationalExpression, PredicateSet]: - """ - Runs the simplification on a single expression, applying any predicates - inferred from the input nodes to aid the process and inferring any new - predicates that apply to the resulting expression. + def visit_empty_singleton(self, node: EmptySingleton) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + self.stack.append(output_predicates) - Args: - `expr`: The expression to simplify. - `input_predicates`: A dictionary mapping input columns to the set of - predicates that are true for the column. - `no_group_aggregate`: A boolean indicating whether the expression is - part of an aggregate operation w/o keys, which affects how predicates - are inferred. - - Returns: - The simplified expression and a set of predicates that apply to the - resulting expression. - """ - new_args: list[RelationalExpression] - new_partitions: list[RelationalExpression] - new_orders: list[ExpressionSortInfo] - arg_predicates: list[PredicateSet] - output_predicates: PredicateSet = PredicateSet() - requires_rewrite: bool = False - - if isinstance(expr, LiteralExpression): - output_predicates = infer_literal_predicates(expr) - - if isinstance(expr, ColumnReference): - output_predicates = input_predicates.get(expr, PredicateSet()) - - if isinstance(expr, CallExpression): - new_args = [] - arg_predicates = [] - for arg in expr.inputs: - new_arg, new_preds = run_simplification( - arg, input_predicates, no_group_aggregate - ) - requires_rewrite |= new_arg is not arg - new_args.append(new_arg) - arg_predicates.append(new_preds) - if requires_rewrite: - expr = CallExpression(expr.op, expr.data_type, new_args) - expr, output_predicates = simplify_function_call( - expr, arg_predicates, no_group_aggregate + def visit_project(self, node: Project) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) ) + self.stack.append(output_predicates) - if isinstance(expr, WindowCallExpression): - new_args = [] - new_partitions = [] - new_orders = [] - arg_predicates = [] - for arg in expr.inputs: - new_arg, new_preds = run_simplification( - arg, input_predicates, no_group_aggregate - ) - requires_rewrite |= new_arg is not arg - new_args.append(new_arg) - arg_predicates.append(new_preds) - for partition in expr.partition_inputs: - new_partition, _ = run_simplification( - partition, input_predicates, no_group_aggregate - ) - requires_rewrite |= new_partition is not partition - new_partitions.append(new_partition) - for order in expr.order_inputs: - new_order, _ = run_simplification( - order.expr, input_predicates, no_group_aggregate - ) - requires_rewrite |= new_order is not order.expr - new_orders.append( - ExpressionSortInfo(new_order, order.ascending, order.nulls_first) - ) - if requires_rewrite: - expr = WindowCallExpression( - expr.op, - expr.data_type, - new_args, - new_partitions, - new_orders, - expr.kwargs, - ) - expr, output_predicates = simplify_window_call(expr, arg_predicates) - - return expr, output_predicates + def visit_filter(self, node: Filter) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + # Transform the filter condition in-place. + node._condition = node.condition.accept_shuttle(self.shuttle) + self.stack.append(output_predicates) + + def visit_join(self, node: Join) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + # Transform the join condition in-place. + node._condition = node.condition.accept_shuttle(self.shuttle) + # If the join is not an inner join, remove any not-null predicates + # from the RHS of the join. + if node.join_type != JoinType.INNER: + for expr, preds in output_predicates.items(): + if ( + isinstance(expr, ColumnReference) + and expr.input_name != node.default_input_aliases[0] + ): + preds.not_null = False + self.stack.append(output_predicates) + + def visit_limit(self, node: Limit) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + # Transform the order keys in-place. + for ordering_expr in node.orderings: + ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) + self.stack.append(output_predicates) + + def visit_root(self, node: RelationalRoot) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + node._ordered_columns = [ + (name, node.columns[name]) for name, _ in node.ordered_columns + ] + # Transform the order keys in-place. + for ordering_expr in node.orderings: + ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) + self.stack.append(output_predicates) + + def visit_aggregate(self, node: Aggregate) -> None: + input_predicates: dict[RelationalExpression, PredicateSet] = ( + self.get_input_predicates(node) + ) + output_predicates: dict[RelationalExpression, PredicateSet] = {} + # Transform the keys & aggregates separately + self.shuttle.input_predicates = input_predicates + self.shuttle.no_group_aggregate = False + for name, expr in node.keys.items(): + ref_expr = ColumnReference(name, expr.data_type) + node.keys[name] = expr.accept_shuttle(self.shuttle) + output_predicates[ref_expr] = self.shuttle.stack.pop() + node.columns[name] = node.keys[name] + self.shuttle.no_group_aggregate = not node.keys + for name, expr in node.aggregations.items(): + ref_expr = ColumnReference(name, expr.data_type) + new_agg = expr.accept_shuttle(self.shuttle) + output_predicates[ref_expr] = self.shuttle.stack.pop() + assert isinstance(new_agg, CallExpression) + node.aggregations[name] = new_agg + node.columns[name] = node.aggregations[name] + self.stack.append(output_predicates) def simplify_expressions( node: RelationalNode, -) -> dict[RelationalExpression, PredicateSet]: +) -> None: """ - The main recursive procedure done to perform expression simplification on - a relational node and its descendants. The transformation is done in-place + Transforms the current node and all of its descendants in-place to simplify + any relational expressions. Args: `node`: The relational node to perform simplification on. - - Returns: - The predicates inferred from the output columns of the node. """ - # Recursively invoke the procedure on all inputs to the node. - input_predicates: dict[RelationalExpression, PredicateSet] = {} - for idx, input_node in enumerate(node.inputs): - input_alias: str | None = node.default_input_aliases[idx] - predicates = simplify_expressions(input_node) - for expr, preds in predicates.items(): - input_predicates[add_input_name(expr, input_alias)] = preds - - # Transform the expressions of the current node in-place. - ref_expr: RelationalExpression - output_predicates: dict[RelationalExpression, PredicateSet] = {} - match node: - case ( - Project() - | Filter() - | Join() - | Limit() - | RelationalRoot() - | Scan() - | EmptySingleton() - ): - for name, expr in node.columns.items(): - ref_expr = ColumnReference(name, expr.data_type) - node.columns[name], output_predicates[ref_expr] = run_simplification( - expr, input_predicates, False - ) - if isinstance(node, (Filter, Join)): - node._condition = run_simplification( - node.condition, input_predicates, False - )[0] - if isinstance(node, (RelationalRoot, Limit)): - node._orderings = [ - ExpressionSortInfo( - run_simplification(order_expr.expr, input_predicates, False)[0], - order_expr.ascending, - order_expr.nulls_first, - ) - for order_expr in node.orderings - ] - if isinstance(node, RelationalRoot): - node._ordered_columns = [ - (name, node.columns[name]) for name, _ in node.ordered_columns - ] - if isinstance(node, Join) and node.join_type != JoinType.INNER: - for expr, preds in output_predicates.items(): - if ( - isinstance(expr, ColumnReference) - and expr.input_name != node.default_input_aliases[0] - ): - preds.not_null = False - case Aggregate(): - for name, expr in node.keys.items(): - ref_expr = ColumnReference(name, expr.data_type) - node.keys[name], output_predicates[ref_expr] = run_simplification( - expr, input_predicates, False - ) - node.columns[name] = node.keys[name] - for name, expr in node.aggregations.items(): - ref_expr = ColumnReference(name, expr.data_type) - new_agg, output_predicates[ref_expr] = run_simplification( - expr, input_predicates, len(node.keys) == 0 - ) - assert isinstance(new_agg, CallExpression) - node.aggregations[name] = new_agg - node.columns[name] = node.aggregations[name] - - # For all other nodes, do not perform any simplification. - case _: - pass - - return output_predicates + simplifier: SimplificationVisitor = SimplificationVisitor() + node.accept(simplifier) diff --git a/pydough/relational/__init__.py b/pydough/relational/__init__.py index ff2cfb653..5f0e11839 100644 --- a/pydough/relational/__init__.py +++ b/pydough/relational/__init__.py @@ -19,6 +19,7 @@ "Project", "RelationalExpression", "RelationalExpressionDispatcher", + "RelationalExpressionShuttle", "RelationalExpressionVisitor", "RelationalNode", "RelationalRoot", @@ -37,6 +38,7 @@ ExpressionSortInfo, LiteralExpression, RelationalExpression, + RelationalExpressionShuttle, RelationalExpressionVisitor, WindowCallExpression, ) diff --git a/pydough/relational/relational_expressions/__init__.py b/pydough/relational/relational_expressions/__init__.py index 3eb8fc33d..487a043c3 100644 --- a/pydough/relational/relational_expressions/__init__.py +++ b/pydough/relational/relational_expressions/__init__.py @@ -14,6 +14,7 @@ "ExpressionSortInfo", "LiteralExpression", "RelationalExpression", + "RelationalExpressionShuttle", "RelationalExpressionVisitor", "WindowCallExpression", ] @@ -27,5 +28,6 @@ from .correlated_reference_finder import CorrelatedReferenceFinder from .expression_sort_info import ExpressionSortInfo from .literal_expression import LiteralExpression +from .relational_expression_shuttle import RelationalExpressionShuttle from .relational_expression_visitor import RelationalExpressionVisitor from .window_call_expression import WindowCallExpression diff --git a/pydough/relational/relational_nodes/relational_visitor.py b/pydough/relational/relational_nodes/relational_visitor.py index 7f8ebe79d..2a138b719 100644 --- a/pydough/relational/relational_nodes/relational_visitor.py +++ b/pydough/relational/relational_nodes/relational_visitor.py @@ -8,6 +8,7 @@ from abc import ABC, abstractmethod +from .abstract_node import RelationalNode from .aggregate import Aggregate from .empty_singleton import EmptySingleton from .filter import Filter @@ -36,7 +37,7 @@ def reset(self) -> None: Clear any internal state to allow reusing this visitor. """ - def visit_inputs(self, node) -> None: + def visit_inputs(self, node: RelationalNode) -> None: """ Visit all inputs of the provided node. This is a helper method to avoid repeating the same code in each visit method. From 94375ce8836606f9e23aba9ae33bb8b9c8fd55b2 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 23 Jul 2025 15:41:58 -0400 Subject: [PATCH 067/143] Fixing comments --- .../conversion/relational_simplification.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index dc1f9e1b1..72883b709 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -268,28 +268,28 @@ def simplify_function_call( union_set: PredicateSet = PredicateSet.union(arg_predicates) intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) - # If the call has null propagating rules, all of hte arguments are non-null, - # the output is guaranteed to be non-null. + # If the call has null propagating rules, all of hte arguments are + # non-null, the output is guaranteed to be non-null. if expr.op in NULL_PROPAGATING_OPS: if intersect_set.not_null: output_predicates.not_null = True match expr.op: case pydop.COUNT | pydop.NDISTINCT: - # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be non-null - # and non-negative. + # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be + # non-null and non-negative. output_predicates.not_null = True output_predicates.not_negative = True - # The output if COUNT(*) is positive if unless doing a no-groupby - # aggregation. Same goes for calling COUNT or NDISTINCT ona non-null - # column. + # The output if COUNT(*) is positive if unless doing a + # no-groupby aggregation. Same goes for calling COUNT or + # NDISTINCT ona non-null column. if not no_group_aggregate: if len(expr.inputs) == 0 or arg_predicates[0].not_null: output_predicates.positive = True - # COUNT(x) where x is non-null can be rewritten as COUNT(*), which - # has the same positive rule as before. + # COUNT(x) where x is non-null can be rewritten as COUNT(*), + # which has the same positive rule as before. elif ( expr.op == pydop.COUNT and len(expr.inputs) == 1 @@ -299,8 +299,8 @@ def simplify_function_call( output_predicates.positive = True output_expr = CallExpression(pydop.COUNT, expr.data_type, []) - # All of these operators are non-null aor non-negative if their first - # argument is. + # All of these operators are non-null aor non-negative if their + # first argument is. case ( pydop.SUM | pydop.AVG @@ -315,8 +315,8 @@ def simplify_function_call( ) # The result of addition is non-negative or positive if all the - # operands are. It is also positive if all the operands are non-negative - # and at least one of them is positive. + # operands are. It is also positive if all the operands are + # non-negative and at least one of them is positive. case pydop.ADD: output_predicates |= intersect_set & PredicateSet( not_negative=True, positive=True @@ -324,16 +324,16 @@ def simplify_function_call( if intersect_set.not_negative and union_set.positive: output_predicates.positive = True - # The result of multiplication is non-negative or positive if all the - # operands are. + # The result of multiplication is non-negative or positive if all + # the operands are. case pydop.MUL: output_predicates |= intersect_set & PredicateSet( not_negative=True, positive=True ) # The result of division is non-negative or positive if all the - # operands are, and is also non-null if both operands are non-null and - # the second operand is positive. + # operands are, and is also non-null if both operands are non-null + # and the second operand is positive. case pydop.DIV: output_predicates |= intersect_set & PredicateSet( not_negative=True, positive=True From 4914784b7a7030f5ea69f4f9510eba938ef958ad Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 23 Jul 2025 15:44:35 -0400 Subject: [PATCH 068/143] [RUN CI] From f150cd587a35615fbb9c9cc2b37297e11e950f48 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:18:46 -0400 Subject: [PATCH 069/143] Adding docstrings --- .../conversion/relational_simplification.py | 121 +++++++++++++----- 1 file changed, 92 insertions(+), 29 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 72883b709..f098da6eb 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -161,7 +161,24 @@ def intersect(predicates: list["PredicateSet"]) -> "PredicateSet": class SimplificationShuttle(RelationalExpressionShuttle): """ - TODO + Shuttle implementation for simplifying relational expressions. Has three + sources of state used to determine how to simplify expressions: + + - `input_predicates`: A dictionary mapping column references to + the corresponding predicate sets for all of the columns that are used as + inputs to all of the expressions in the current relational node (e.g. from + the inputs to the node). This needs to be set before the shuttle is + used, and the default is an empty dictionary. + - `no_group_aggregate`: A boolean indicating whether the current + transformation is being done within the context of an aggregation without + grouping keys. This is important because some aggregation functions will + have different behaviors with/without grouping keys. For example, COUNT(*) + is always positive if there are grouping keys, but if there are no + grouping keys, the answer could be 0. This needs to be set before the + shuttle is used, and the default is False. + - `stack`: A stack of predicate sets corresponding to all inputs to the + current expression. Used for simplifying function calls by first + simplifying their inputs and placing their predicate sets on the stack. """ def __init__(self): @@ -261,7 +278,22 @@ def simplify_function_call( no_group_aggregate: bool, ) -> RelationalExpression: """ - TODO + Procedure to simplify a function call expression based on the operator + and the predicates of its arguments. This assumes that the arguments + have already been simplified. + + Args: + `expr`: The CallExpression to simplify, whose arguments have already + been simplified. + `arg_predicates`: A list of PredicateSet objects corresponding to + the predicates of the arguments of the expression. + `no_group_aggregate`: Whether the expression is part of a no-group + aggregate. + + Returns: + The simplified expression with the predicates updated based on the + simplification rules. The predicates for the output are placed on + the stack. """ output_expr: RelationalExpression = expr output_predicates: PredicateSet = PredicateSet() @@ -698,7 +730,20 @@ def simplify_window_call( arg_predicates: list[PredicateSet], ) -> RelationalExpression: """ - TODO + Procedure to simplify a window call expression based on the operator + and the predicates of its arguments. This assumes that the arguments + have already been simplified. + + Args: + `expr`: The WindowCallExpression to simplify, whose arguments have + already been simplified. + `arg_predicates`: A list of PredicateSet objects corresponding to + the predicates of the arguments of the expression. + + Returns: + The simplified expression with the predicates updated based on + the simplification rules. The predicates for the output are placed + on the stack. """ output_predicates: PredicateSet = PredicateSet() output_expr: RelationalExpression = expr @@ -747,7 +792,12 @@ def simplify_window_call( class SimplificationVisitor(RelationalVisitor): """ - TODO + Relational visitor implementation that simplifies relational expressions + within the relational tree and its subtrees in-place. The visitor first + transforms all the subtrees and collects predicate set information for the + output columns of each node, then uses those predicates to simplify the + expressions of the current node. The predicates for the output predicates of + the current node are placed on the stack. """ def __init__(self): @@ -762,11 +812,19 @@ def get_input_predicates( self, node: RelationalNode ) -> dict[RelationalExpression, PredicateSet]: """ - TODO + Recursively simplifies the inputs to the current node and collects + the predicates for each column from all of the inputs to the current + node. + + Args: + `node`: The current relational node whose inputs are being + simplified. + + Returns: + A dictionary mapping each input column reference from a column from + an input to the current node to the set of its inferred predicates. """ - # Recursively invoke the procedure on all inputs to the node. self.visit_inputs(node) - # For each input, pop the predicates from the stack and add them # to the input predicates dictionary, using the appropriate input alias. input_predicates: dict[RelationalExpression, PredicateSet] = {} @@ -782,14 +840,29 @@ def generic_visit( self, node: RelationalNode ) -> dict[RelationalExpression, PredicateSet]: """ - TODO + The generic pattern for relational simplification used by most of the + relational nodes as a base. It simplifies all descendants of the current + node, and uses the predicates from the inputs to transform all of the + expressions of the current node in-place. The predicates for the output + columns of the current node are returned as a dictionary mapping each + output column reference to its set of predicates. + + Args: + `node`: The current relational node to simplify. + + Returns: + A dictionary mapping each output column reference from the current + node to the set of its inferred predicates. """ + # Simplify the inputs to the current node and collect the predicates + # for each column from the inputs. input_predicates: dict[RelationalExpression, PredicateSet] = ( self.get_input_predicates(node) ) + # Set the input predicates and no-group-aggregate state for the shuttle. self.shuttle.input_predicates = input_predicates - self.shuttle.no_group_aggregate = not ( - isinstance(node, Aggregate) and not node.keys + self.shuttle.no_group_aggregate = ( + isinstance(node, Aggregate) and len(node.keys) == 0 ) # Transform the expressions of the current node in-place. ref_expr: RelationalExpression @@ -865,26 +938,16 @@ def visit_root(self, node: RelationalRoot) -> None: self.stack.append(output_predicates) def visit_aggregate(self, node: Aggregate) -> None: - input_predicates: dict[RelationalExpression, PredicateSet] = ( - self.get_input_predicates(node) + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) ) - output_predicates: dict[RelationalExpression, PredicateSet] = {} - # Transform the keys & aggregates separately - self.shuttle.input_predicates = input_predicates - self.shuttle.no_group_aggregate = False - for name, expr in node.keys.items(): - ref_expr = ColumnReference(name, expr.data_type) - node.keys[name] = expr.accept_shuttle(self.shuttle) - output_predicates[ref_expr] = self.shuttle.stack.pop() - node.columns[name] = node.keys[name] - self.shuttle.no_group_aggregate = not node.keys - for name, expr in node.aggregations.items(): - ref_expr = ColumnReference(name, expr.data_type) - new_agg = expr.accept_shuttle(self.shuttle) - output_predicates[ref_expr] = self.shuttle.stack.pop() - assert isinstance(new_agg, CallExpression) - node.aggregations[name] = new_agg - node.columns[name] = node.aggregations[name] + # Transform the keys & aggregations to match the columns. + for name in node.keys: + node.keys[name] = node.columns[name] + for name in node.aggregations: + expr = node.columns[name] + assert isinstance(expr, CallExpression) + node.aggregations[name] = expr self.stack.append(output_predicates) From 8d0fc6bc6281d3122a74646429c95946d5c05f44 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:25:44 -0400 Subject: [PATCH 070/143] Revisions --- .../conversion/relational_simplification.py | 70 ++++++++++--------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index f098da6eb..1cba39855 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -111,52 +111,56 @@ def intersect(predicates: list["PredicateSet"]) -> "PredicateSet": NULL_PROPAGATING_OPS: set[pydop.PyDoughOperator] = { + pydop.ABS, pydop.ADD, - pydop.SUB, - pydop.MUL, pydop.BAN, pydop.BOR, - pydop.NOT, - pydop.LOWER, - pydop.UPPER, - pydop.LENGTH, - pydop.STRIP, - pydop.REPLACE, - pydop.FIND, - pydop.ABS, + pydop.BXR, pydop.CEIL, - pydop.FLOOR, - pydop.ROUND, + pydop.CONTAINS, + pydop.DATEDIFF, + pydop.DAY, + pydop.DAYNAME, + pydop.DAYOFWEEK, + pydop.ENDSWITH, pydop.EQU, - pydop.NEQ, + pydop.FIND, + pydop.FLOOR, pydop.GEQ, pydop.GRT, - pydop.LET, + pydop.HOUR, + pydop.JOIN_STRINGS, + pydop.LARGEST, + pydop.LENGTH, pydop.LEQ, - pydop.BXR, - pydop.STARTSWITH, - pydop.ENDSWITH, - pydop.CONTAINS, + pydop.LET, pydop.LIKE, - pydop.SIGN, - pydop.SMALLEST, - pydop.LARGEST, - pydop.IFF, - pydop.YEAR, - pydop.MONTH, - pydop.DAY, - pydop.HOUR, + pydop.LOWER, + pydop.LPAD, pydop.MINUTE, + pydop.MONOTONIC, + pydop.MONTH, + pydop.MUL, + pydop.NEQ, + pydop.NOT, + pydop.REPLACE, + pydop.ROUND, + pydop.RPAD, pydop.SECOND, - pydop.DATEDIFF, - pydop.DAYNAME, - pydop.DAYOFWEEK, + pydop.SIGN, pydop.SLICE, - pydop.LPAD, - pydop.RPAD, - pydop.MONOTONIC, - pydop.JOIN_STRINGS, + pydop.SMALLEST, + pydop.STARTSWITH, + pydop.STRIP, + pydop.SUB, + pydop.UPPER, + pydop.YEAR, } +""" +A set of operators that only output null if one of the inputs is null. This set +is significant because it means that if all of the inputs to a function are +guaranteed to be non-null, the output is guaranteed to be non-null as well. +""" class SimplificationShuttle(RelationalExpressionShuttle): From 22a94ab9223aa1cfc58f90a5047ae50c178fa8cf Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:30:51 -0400 Subject: [PATCH 071/143] Stack cleanup --- pydough/conversion/relational_simplification.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 1cba39855..cdaaff5a6 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -901,6 +901,7 @@ def visit_filter(self, node: Filter) -> None: ) # Transform the filter condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) + self.shuttle.stack.pop() self.stack.append(output_predicates) def visit_join(self, node: Join) -> None: @@ -909,6 +910,7 @@ def visit_join(self, node: Join) -> None: ) # Transform the join condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) + self.shuttle.stack.pop() # If the join is not an inner join, remove any not-null predicates # from the RHS of the join. if node.join_type != JoinType.INNER: @@ -927,6 +929,7 @@ def visit_limit(self, node: Limit) -> None: # Transform the order keys in-place. for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) + self.shuttle.stack.pop() self.stack.append(output_predicates) def visit_root(self, node: RelationalRoot) -> None: @@ -939,6 +942,7 @@ def visit_root(self, node: RelationalRoot) -> None: # Transform the order keys in-place. for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) + self.shuttle.stack.pop() self.stack.append(output_predicates) def visit_aggregate(self, node: Aggregate) -> None: From a9716763b91edff7115ce3f0acd2acaed7d293a4 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:38:00 -0400 Subject: [PATCH 072/143] Adding additional shuttle framework --- pydough/conversion/relational_converter.py | 14 +++++++--- .../conversion/relational_simplification.py | 27 ++++++++++++++++--- .../relational_expression_shuttle.py | 6 +++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 6c1e86694..8ddf78c9a 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -43,6 +43,7 @@ LiteralExpression, Project, RelationalExpression, + RelationalExpressionShuttle, RelationalNode, RelationalRoot, Scan, @@ -1405,7 +1406,9 @@ def confirm_root(node: RelationalNode) -> RelationalRoot: def optimize_relational_tree( - root: RelationalRoot, configs: PyDoughConfigs + root: RelationalRoot, + configs: PyDoughConfigs, + additional_shuttles: list[RelationalExpressionShuttle], ) -> RelationalRoot: """ Runs optimize on the relational tree, including pushing down filters and @@ -1414,6 +1417,8 @@ def optimize_relational_tree( Args: `root`: the relational root to optimize. `configs`: the configuration settings to use during optimization. + `additional_shuttles`: additional relational expression shuttles to use + for expression simplification. Returns: The optimized relational root. @@ -1468,7 +1473,7 @@ def optimize_relational_tree( # pullup and pushdown and so on. for _ in range(2): root = confirm_root(pullup_projections(root)) - simplify_expressions(root) + simplify_expressions(root, additional_shuttles) root._input = push_filters(root.input, set()) root = ColumnPruner().prune_unused_columns(root) @@ -1535,6 +1540,9 @@ def convert_ast_to_relational( raw_result: RelationalRoot = postprocess_root(node, columns, hybrid, output) # Invoke the optimization procedures on the result to clean up the tree. - optimized_result: RelationalRoot = optimize_relational_tree(raw_result, configs) + additional_shuttles: list[RelationalExpressionShuttle] = [] + optimized_result: RelationalRoot = optimize_relational_tree( + raw_result, configs, additional_shuttles + ) return optimized_result diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index cdaaff5a6..f9d8ef4c4 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -804,13 +804,18 @@ class SimplificationVisitor(RelationalVisitor): the current node are placed on the stack. """ - def __init__(self): + def __init__(self, additional_shuttles: list[RelationalExpressionShuttle]): self.stack: list[dict[RelationalExpression, PredicateSet]] = [] self.shuttle: SimplificationShuttle = SimplificationShuttle() + self.additional_shuttles: list[RelationalExpressionShuttle] = ( + additional_shuttles + ) def reset(self): self.stack.clear() self.shuttle.reset() + for shuttle in self.additional_shuttles: + shuttle.reset() def get_input_predicates( self, node: RelationalNode @@ -873,8 +878,11 @@ def generic_visit( output_predicates: dict[RelationalExpression, PredicateSet] = {} for name, expr in node.columns.items(): ref_expr = ColumnReference(name, expr.data_type) - node.columns[name] = expr.accept_shuttle(self.shuttle) + expr = expr.accept_shuttle(self.shuttle) output_predicates[ref_expr] = self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + expr = expr.accept_shuttle(shuttle) + node.columns[name] = expr return output_predicates def visit_scan(self, node: Scan) -> None: @@ -902,6 +910,8 @@ def visit_filter(self, node: Filter) -> None: # Transform the filter condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + node._condition = node.condition.accept_shuttle(shuttle) self.stack.append(output_predicates) def visit_join(self, node: Join) -> None: @@ -911,6 +921,8 @@ def visit_join(self, node: Join) -> None: # Transform the join condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + node._condition = node.condition.accept_shuttle(shuttle) # If the join is not an inner join, remove any not-null predicates # from the RHS of the join. if node.join_type != JoinType.INNER: @@ -930,6 +942,8 @@ def visit_limit(self, node: Limit) -> None: for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + ordering_expr.expr = ordering_expr.expr.accept_shuttle(shuttle) self.stack.append(output_predicates) def visit_root(self, node: RelationalRoot) -> None: @@ -943,6 +957,8 @@ def visit_root(self, node: RelationalRoot) -> None: for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + ordering_expr.expr = ordering_expr.expr.accept_shuttle(shuttle) self.stack.append(output_predicates) def visit_aggregate(self, node: Aggregate) -> None: @@ -961,6 +977,7 @@ def visit_aggregate(self, node: Aggregate) -> None: def simplify_expressions( node: RelationalNode, + additional_shuttles: list[RelationalExpressionShuttle], ) -> None: """ Transforms the current node and all of its descendants in-place to simplify @@ -968,6 +985,10 @@ def simplify_expressions( Args: `node`: The relational node to perform simplification on. + `additional_shuttles`: A list of additional shuttles to apply to the + expressions of the node and its descendants. These shuttles are applied + after the simplification shuttle, and can be used to perform additional + transformations on the expressions. """ - simplifier: SimplificationVisitor = SimplificationVisitor() + simplifier: SimplificationVisitor = SimplificationVisitor(additional_shuttles) node.accept(simplifier) diff --git a/pydough/relational/relational_expressions/relational_expression_shuttle.py b/pydough/relational/relational_expressions/relational_expression_shuttle.py index d43642e35..e67326c41 100644 --- a/pydough/relational/relational_expressions/relational_expression_shuttle.py +++ b/pydough/relational/relational_expressions/relational_expression_shuttle.py @@ -23,6 +23,12 @@ class RelationalExpressionShuttle(ABC): at the end of each visit. """ + def reset(self): + """ + Reset the shuttle to its initial state. + This is useful if the shuttle is reused for multiple visits. + """ + def visit_call_expression( self, call_expression: CallExpression ) -> RelationalExpression: From 9c6caa21484780430af5fe4725c3a7e81e3778f3 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:45:39 -0400 Subject: [PATCH 073/143] [RUN CI] From 95e59d10f3a1b829de6d09fff472e1391fcf94d5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:47:33 -0400 Subject: [PATCH 074/143] [RUN CI] From b63c5d40ad33a3b8937a1eee158315231249c110 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 31 Jul 2025 00:36:06 -0400 Subject: [PATCH 075/143] Added more simplfication patterns to tests --- .../conversion/relational_simplification.py | 53 ++++++----- tests/test_pipeline_defog_custom.py | 94 ++++++++++++++++++- tests/test_plan_refsols/simplification_2.txt | 5 +- tests/test_plan_refsols/simplification_3.txt | 7 +- .../simplification_2_ansi.sql | 15 ++- .../simplification_2_sqlite.sql | 15 ++- .../simplification_3_ansi.sql | 58 +++++++++++- .../simplification_3_sqlite.sql | 87 ++++++++++++++++- 8 files changed, 290 insertions(+), 44 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index f9d8ef4c4..a1e20c7df 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -382,31 +382,34 @@ def simplify_function_call( output_predicates.not_null = True case pydop.DEFAULT_TO: - # DEFAULT_TO(None, x) -> x - if ( - isinstance(expr.inputs[0], LiteralExpression) - and expr.inputs[0].value is None - ): - if len(expr.inputs) == 2: - output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] - else: - output_expr = CallExpression( - pydop.DEFAULT_TO, expr.data_type, expr.inputs[1:] - ) - output_predicates |= PredicateSet.intersect(arg_predicates[1:]) - - # DEFAULT_TO(x, y) -> x if x is non-null. - elif arg_predicates[0].not_null: - output_expr = expr.inputs[0] - output_predicates |= arg_predicates[0] - - # Otherwise, it is non-null if any of the arguments are non-null, - # and gains any predicates that all the arguments have in common. + # Modify the list of arguments by removing any that are None, + # and stopping once we find the first argument that has is + # non-null. + new_args: list[RelationalExpression] = [] + new_predicates: list[PredicateSet] = [] + for i, arg in enumerate(expr.inputs): + if isinstance(arg, LiteralExpression) and arg.value is None: + continue + new_args.append(arg) + new_predicates.append(arg_predicates[i]) + if arg_predicates[i].not_null: + break + if len(new_args) == 0: + # If all inputs are None, the output is None. + output_expr = LiteralExpression(None, expr.data_type) + elif len(new_args) == 1: + # If there is only one input, the output is that input. + output_expr = new_args[0] + output_predicates |= new_predicates[0] else: - if union_set.not_null: + # If there are multiple inputs, the output is a new + # DEFAULT_TO expression with the non-None inputs. + output_expr = CallExpression( + pydop.DEFAULT_TO, expr.data_type, new_args + ) + output_predicates = PredicateSet.intersect(new_predicates) + if PredicateSet.union(new_predicates).not_null: output_predicates.not_null = True - output_predicates |= intersect_set # ABS(x) -> x if x is positive or non-negative. At hte very least, we # know it is always non-negative. @@ -552,8 +555,8 @@ def simplify_function_call( output_expr = LiteralExpression(False, expr.data_type) output_predicates.not_negative = True - # XOR and LIKE are always non-negative - case pydop.BXR | pydop.LIKE: + # LIKE is always non-negative + case pydop.LIKE: output_predicates.not_negative = True # X & Y is False if any of the arguments are False-y literals, and True diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index 58265f72b..f85afa9c2 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -1712,6 +1712,16 @@ def get_day_of_week( " s35 = True == False," # -> False " s36 = True != False," # -> True " s37 = SQRT(9)," # -> 3.0 + " s38 = COUNT(customers) == None," # -> None + " s39 = None >= COUNT(customers)," # -> None + " s40 = COUNT(customers) > None," # -> None + " s41 = None < COUNT(customers)," # -> None + " s42 = None <= COUNT(customers)," # -> None + " s43 = None + COUNT(customers)," # -> None + " s44 = COUNT(customers) - None," # -> None + " s45 = None * COUNT(customers)," # -> None + " s46 = COUNT(customers) / None," # -> None + " s47 = ABS(DEFAULT_TO(LIKE(DEFAULT_TO(MAX(customers.name), ''), '%r%'), 1))" # -> COALESCE(MAX(sbcustname), '') LIKE '%r%' ")", "Broker", lambda: pd.DataFrame( @@ -1754,6 +1764,16 @@ def get_day_of_week( "s35": [0], "s36": [1], "s37": [3.0], + "s38": [None], + "s39": [None], + "s40": [None], + "s41": [None], + "s42": [None], + "s43": [None], + "s44": [None], + "s45": [None], + "s46": [None], + "s47": [1], } ), "simplification_2", @@ -1762,6 +1782,18 @@ def get_day_of_week( ), pytest.param( PyDoughPandasTest( + "cust_info = customers.CALCULATE(p=DEFAULT_TO(INTEGER(postal_code), 0))" + " .CALCULATE(" + " rank = RANKING(by=name.ASC())," + " rsum1 = DEFAULT_TO(RELSUM(ABS(p)), 0.1)," + " rsum2 = DEFAULT_TO(RELSUM(ABS(p), by=name.ASC(), cumulative=True), 0.1)," + " ravg1 = DEFAULT_TO(RELAVG(ABS(p)), 0.1)," + " ravg2 = DEFAULT_TO(RELAVG(ABS(p), by=name.ASC(), frame=(None, -1)), 0.1)," + " rcnt1 = DEFAULT_TO(RELCOUNT(INTEGER(postal_code)), 0.1)," + " rcnt2 = DEFAULT_TO(RELCOUNT(INTEGER(postal_code), by=name.ASC(), cumulative=True), 0.1)," + " rsiz1 = DEFAULT_TO(RELSIZE(), 0.1)," + " rsiz2 = DEFAULT_TO(RELSIZE(by=name.ASC(), frame=(1, None)), 0.1)," + ")\n" "result = Broker.CALCULATE(" " s00 = MONOTONIC(1, 2, 3)," # -> True " s01 = MONOTONIC(1, 1, 1)," # -> True @@ -1769,10 +1801,37 @@ def get_day_of_week( " s03 = MONOTONIC(1, 4, 3)," # -> False " s04 = MONOTONIC(1, 2, 1)," # -> False " s05 = MONOTONIC(1, 0, 1)," # -> False - " s06 = MONOTONIC(1, LENGTH('foo'), COUNT(customers))," # -> 3 <= COUNT(customers) - " s07 = MONOTONIC(10, LENGTH('foo'), COUNT(customers))," # False - " s08 = MONOTONIC(COUNT(customers), LENGTH('foobar'), 9)," # -> COUNT(customers) <= 6 - " s09 = MONOTONIC(COUNT(customers), LENGTH('foobar'), 5)," # -> False + " s06 = MONOTONIC(1, LENGTH('foo'), COUNT(cust_info))," # -> 3 <= COUNT(*) + " s07 = MONOTONIC(10, LENGTH('foo'), COUNT(cust_info))," # False + " s08 = MONOTONIC(COUNT(cust_info), LENGTH('foobar'), 9)," # -> COUNT(*) <= 6 + " s09 = MONOTONIC(COUNT(cust_info), LENGTH('foobar'), 5)," # -> False + " s10 = 13 * 7," # -> 91 + " s11 = 42 * LENGTH('')," # -> 0 + " s12 = 42 + LENGTH('fizzbuzz')," # -> 50 + " s13 = 50 - 15," # -> 35 + " s14 = 50 / 2," # -> 25 + " s15 = ABS(COUNT(cust_info) * -0.75)," # -> not simplified + " s16 = DEFAULT_TO(10, COUNT(cust_info))," # -> 10 + " s17 = DEFAULT_TO(None, None, None, COUNT(cust_info))," # -> COUNT(*) + " s18 = DEFAULT_TO(None, None, COUNT(cust_info), None, -1)," # -> COUNT(*) + " s19 = STARTSWITH('', 'a')," # -> False + " s20 = STARTSWITH('a', '')," # -> True + " s21 = ENDSWITH('', 'a')," # -> False + " s22 = ENDSWITH('a', '')," # -> True + " s23 = CONTAINS('', 'a')," # -> False + " s24 = CONTAINS('a', '')," # -> True + " s25 = ABS(QUANTILE(ABS(INTEGER(cust_info.postal_code)), 0.25))," # -> QUANTILE(ABS(INTEGER(cust_info.postal_code)), 0.25) + " s26 = ABS(MEDIAN(ABS(INTEGER(cust_info.postal_code))))," # -> MEDIAN(ABS(INTEGER(cust_info.postal_code))) + " s27 = ABS(MIN(cust_info.rank))," # -> MIN(cust_info.rank) + " s28 = ABS(MAX(cust_info.rank))," # -> MAX(cust_info.rank) + " s29 = ABS(ANYTHING(cust_info.rsum1))," # -> ANYTHING(cust_info.rsum1) + " s30 = ROUND(ABS(SUM(cust_info.rsum2)), 2)," # -> ROUND(SUM(cust_info.rsum2), 2) + " s31 = ABS(ANYTHING(cust_info.ravg1))," # -> ANYTHING(cust_info.ravg1) + " s32 = ROUND(ABS(SUM(cust_info.ravg2)), 2)," # -> ROUND(SUM(cust_info.ravg2), 2) + " s33 = ABS(ANYTHING(cust_info.rcnt1))," # -> ANYTHING(cust_info.rcnt1) + " s34 = ROUND(ABS(SUM(cust_info.rcnt2)), 2)," # -> ROUND(SUM(cust_info.rcnt2), 2) + " s35 = ABS(ANYTHING(cust_info.rsiz1))," # -> ANYTHING(cust_info.rsiz1) + " s36 = ROUND(ABS(SUM(cust_info.rsiz2)), 2)," # -> ROUND(SUM(cust_info.rsiz2), 2) ")", "Broker", lambda: pd.DataFrame( @@ -1787,6 +1846,33 @@ def get_day_of_week( "s07": [0], "s08": [0], "s09": [0], + "s10": [91], + "s11": [0], + "s12": [50], + "s13": [35], + "s14": [25.0], + "s15": [15.0], + "s16": [10], + "s17": [20], + "s18": [20], + "s19": [0], + "s20": [1], + "s21": [0], + "s22": [1], + "s23": [0], + "s24": [1], + "s25": [10002], + "s26": [54050.5], + "s27": [1], + "s28": [20], + "s29": [1027021], + "s30": [9096414.0], + "s31": [51351.05], + "s32": [802375.94], + "s33": [20], + "s34": [210.0], + "s35": [20], + "s36": [190.0], } ), "simplification_3", diff --git a/tests/test_plan_refsols/simplification_2.txt b/tests/test_plan_refsols/simplification_2.txt index 4e9433c17..f15a3a4fc 100644 --- a/tests/test_plan_refsols/simplification_2.txt +++ b/tests/test_plan_refsols/simplification_2.txt @@ -1,2 +1,3 @@ -ROOT(columns=[('s00', True:bool), ('s01', False:bool), ('s02', True:bool), ('s03', False:bool), ('s04', True:bool), ('s05', False:bool), ('s06', None:bool), ('s07', None:bool), ('s08', None:bool), ('s09', None:bool), ('s10', None:bool), ('s11', None:bool), ('s12', False:bool), ('s13', True:bool), ('s14', False:bool), ('s15', False:bool), ('s16', True:bool), ('s17', True:bool), ('s18', True:bool), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', False:bool), ('s25', True:bool), ('s26', True:bool), ('s27', False:bool), ('s28', True:bool), ('s29', False:bool), ('s30', 8:numeric), ('s31', 'alphabet':string), ('s32', 'SOUP':string), ('s33', True:bool), ('s34', False:bool), ('s35', False:bool), ('s36', True:bool), ('s37', 3.0:numeric)], orderings=[]) - EMPTYSINGLETON() +ROOT(columns=[('s00', True:bool), ('s01', False:bool), ('s02', True:bool), ('s03', False:bool), ('s04', True:bool), ('s05', False:bool), ('s06', None:bool), ('s07', None:bool), ('s08', None:bool), ('s09', None:bool), ('s10', None:bool), ('s11', None:bool), ('s12', False:bool), ('s13', True:bool), ('s14', False:bool), ('s15', False:bool), ('s16', True:bool), ('s17', True:bool), ('s18', True:bool), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', False:bool), ('s25', True:bool), ('s26', True:bool), ('s27', False:bool), ('s28', True:bool), ('s29', False:bool), ('s30', 8:numeric), ('s31', 'alphabet':string), ('s32', 'SOUP':string), ('s33', True:bool), ('s34', False:bool), ('s35', False:bool), ('s36', True:bool), ('s37', 3.0:numeric), ('s38', n_rows == None:unknown), ('s39', n_rows <= None:unknown), ('s40', n_rows > None:unknown), ('s41', n_rows > None:unknown), ('s42', n_rows >= None:unknown), ('s43', None:unknown + n_rows), ('s44', n_rows - None:unknown), ('s45', None:unknown * n_rows), ('s46', n_rows / None:unknown), ('s47', LIKE(DEFAULT_TO(max_sbCustName, '':string), '%r%':string))], orderings=[]) + AGGREGATE(keys={}, aggregations={'max_sbCustName': MAX(sbCustName), 'n_rows': COUNT()}) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/test_plan_refsols/simplification_3.txt b/tests/test_plan_refsols/simplification_3.txt index 8078734b7..9c1130483 100644 --- a/tests/test_plan_refsols/simplification_3.txt +++ b/tests/test_plan_refsols/simplification_3.txt @@ -1,3 +1,4 @@ -ROOT(columns=[('s00', True:bool), ('s01', True:bool), ('s02', False:bool), ('s03', False:bool), ('s04', False:bool), ('s05', False:bool), ('s06', 3:numeric <= n_rows), ('s07', False:bool), ('s08', n_rows <= 6:numeric), ('s09', False:bool)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.sbCustomer, columns={}) +ROOT(columns=[('s00', True:bool), ('s01', True:bool), ('s02', False:bool), ('s03', False:bool), ('s04', False:bool), ('s05', False:bool), ('s06', 3:numeric <= n_rows), ('s07', False:bool), ('s08', n_rows <= 6:numeric), ('s09', False:bool), ('s10', 91:numeric), ('s11', 42:numeric * 0:numeric), ('s12', 42:numeric + 8:numeric), ('s13', 35:numeric), ('s14', 25.0:numeric), ('s15', ABS(n_rows * -0.75:numeric)), ('s16', 10:numeric), ('s17', n_rows), ('s18', n_rows), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', True:bool), ('s25', agg_1), ('s26', median_expr_13), ('s27', min_rank), ('s28', max_rank), ('s29', anything_rsum1), ('s30', ROUND(sum_rsum2, 2:numeric)), ('s31', anything_ravg1), ('s32', ROUND(sum_ravg2, 2:numeric)), ('s33', anything_rcnt1), ('s34', ROUND(sum_rcnt2, 2:numeric)), ('s35', anything_rsiz1), ('s36', ROUND(sum_rsiz2, 2:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'agg_1': QUANTILE(ABS(INTEGER(sbCustPostalCode)), 0.25:numeric), 'anything_ravg1': ANYTHING(ravg1), 'anything_rcnt1': ANYTHING(rcnt1), 'anything_rsiz1': ANYTHING(rsiz1), 'anything_rsum1': ANYTHING(rsum1), 'max_rank': MAX(rank), 'median_expr_13': MEDIAN(ABS(INTEGER(sbCustPostalCode))), 'min_rank': MIN(rank), 'n_rows': COUNT(), 'sum_ravg2': SUM(ravg2), 'sum_rcnt2': SUM(rcnt2), 'sum_rsiz2': SUM(rsiz2), 'sum_rsum2': SUM(rsum2)}) + PROJECT(columns={'rank': RANKING(args=[], partition=[], order=[(sbCustName):asc_last]), 'ravg1': RELAVG(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[]), 'ravg2': DEFAULT_TO(RELAVG(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[(sbCustName):asc_last], frame=(None, -1)), 0.1:numeric), 'rcnt1': RELCOUNT(args=[INTEGER(sbCustPostalCode)], partition=[], order=[]), 'rcnt2': DEFAULT_TO(RELCOUNT(args=[INTEGER(sbCustPostalCode)], partition=[], order=[(sbCustName):asc_last], cumulative=True), 0.1:numeric), 'rsiz1': RELSIZE(args=[], partition=[], order=[]), 'rsiz2': DEFAULT_TO(RELSIZE(args=[], partition=[], order=[(sbCustName):asc_last], frame=(1, None)), 0.1:numeric), 'rsum1': RELSUM(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[]), 'rsum2': DEFAULT_TO(RELSUM(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[(sbCustName):asc_last], cumulative=True), 0.1:numeric), 'sbCustPostalCode': sbCustPostalCode}) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName, 'sbCustPostalCode': sbCustPostalCode}) diff --git a/tests/test_sql_refsols/simplification_2_ansi.sql b/tests/test_sql_refsols/simplification_2_ansi.sql index 237dabec3..963c3a426 100644 --- a/tests/test_sql_refsols/simplification_2_ansi.sql +++ b/tests/test_sql_refsols/simplification_2_ansi.sql @@ -36,6 +36,15 @@ SELECT FALSE AS s34, FALSE AS s35, TRUE AS s36, - 3.0 AS s37 -FROM (VALUES - (NULL)) AS _q_0(_col_0) + 3.0 AS s37, + NULL AS s38, + NULL AS s39, + NULL AS s40, + NULL AS s41, + NULL AS s42, + NULL AS s43, + NULL AS s44, + NULL AS s45, + NULL AS s46, + COALESCE(MAX(sbcustname), '') LIKE '%r%' AS s47 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_2_sqlite.sql b/tests/test_sql_refsols/simplification_2_sqlite.sql index b5d5d71f5..963c3a426 100644 --- a/tests/test_sql_refsols/simplification_2_sqlite.sql +++ b/tests/test_sql_refsols/simplification_2_sqlite.sql @@ -36,6 +36,15 @@ SELECT FALSE AS s34, FALSE AS s35, TRUE AS s36, - 3.0 AS s37 -FROM (VALUES - (NULL)) AS _q_0 + 3.0 AS s37, + NULL AS s38, + NULL AS s39, + NULL AS s40, + NULL AS s41, + NULL AS s42, + NULL AS s43, + NULL AS s44, + NULL AS s45, + NULL AS s46, + COALESCE(MAX(sbcustname), '') LIKE '%r%' AS s47 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_3_ansi.sql b/tests/test_sql_refsols/simplification_3_ansi.sql index 3b49cd41b..c1f6211b7 100644 --- a/tests/test_sql_refsols/simplification_3_ansi.sql +++ b/tests/test_sql_refsols/simplification_3_ansi.sql @@ -1,3 +1,29 @@ +WITH _t1 AS ( + SELECT + ROW_NUMBER() OVER (ORDER BY sbcustname NULLS LAST) AS rank, + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER () AS ravg1, + COALESCE( + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER (ORDER BY sbcustname NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), + 0.1 + ) AS ravg2, + COUNT(CAST(sbcustpostalcode AS BIGINT)) OVER () AS rcnt1, + COALESCE( + COUNT(CAST(sbcustpostalcode AS BIGINT)) OVER (ORDER BY sbcustname NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rcnt2, + COUNT(*) OVER () AS rsiz1, + COALESCE( + COUNT(*) OVER (ORDER BY sbcustname NULLS LAST ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING), + 0.1 + ) AS rsiz2, + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER () AS rsum1, + COALESCE( + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER (ORDER BY sbcustname NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rsum2, + sbcustpostalcode + FROM main.sbcustomer +) SELECT TRUE AS s00, TRUE AS s01, @@ -8,5 +34,33 @@ SELECT COUNT(*) >= 3 AS s06, FALSE AS s07, COUNT(*) <= 6 AS s08, - FALSE AS s09 -FROM main.sbcustomer + FALSE AS s09, + 91 AS s10, + 0 AS s11, + 50 AS s12, + 35 AS s13, + 25.0 AS s14, + ABS(COUNT(*) * -0.75) AS s15, + 10 AS s16, + COUNT(*) AS s17, + COUNT(*) AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + TRUE AS s24, + PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY + ABS(CAST(sbcustpostalcode AS BIGINT)) NULLS LAST) AS s25, + MEDIAN(ABS(CAST(sbcustpostalcode AS BIGINT))) AS s26, + MIN(rank) AS s27, + MAX(rank) AS s28, + ANY_VALUE(rsum1) AS s29, + ROUND(SUM(rsum2), 2) AS s30, + ANY_VALUE(ravg1) AS s31, + ROUND(SUM(ravg2), 2) AS s32, + ANY_VALUE(rcnt1) AS s33, + ROUND(SUM(rcnt2), 2) AS s34, + ANY_VALUE(rsiz1) AS s35, + ROUND(SUM(rsiz2), 2) AS s36 +FROM _t1 diff --git a/tests/test_sql_refsols/simplification_3_sqlite.sql b/tests/test_sql_refsols/simplification_3_sqlite.sql index 3b49cd41b..a31640d1b 100644 --- a/tests/test_sql_refsols/simplification_3_sqlite.sql +++ b/tests/test_sql_refsols/simplification_3_sqlite.sql @@ -1,3 +1,59 @@ +WITH _t2 AS ( + SELECT + ABS(CAST(sbcustpostalcode AS INTEGER)) AS expr_13, + ROW_NUMBER() OVER (ORDER BY sbcustname) AS rank, + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0))) OVER () AS ravg1, + COALESCE( + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0))) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), + 0.1 + ) AS ravg2, + COUNT(CAST(sbcustpostalcode AS INTEGER)) OVER () AS rcnt1, + COALESCE( + COUNT(CAST(sbcustpostalcode AS INTEGER)) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rcnt2, + COUNT(*) OVER () AS rsiz1, + COALESCE( + COUNT(*) OVER (ORDER BY sbcustname ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING), + 0.1 + ) AS rsiz2, + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0))) OVER () AS rsum1, + COALESCE( + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0))) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rsum2 + FROM main.sbcustomer +), _t1 AS ( + SELECT + CASE + WHEN CAST(0.75 * COUNT(expr_13) OVER () AS INTEGER) < ROW_NUMBER() OVER (ORDER BY expr_13 DESC) + THEN expr_13 + ELSE NULL + END AS expr_15, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (ORDER BY expr_13 DESC) - 1.0 + ) - ( + CAST(( + COUNT(expr_13) OVER () - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN expr_13 + ELSE NULL + END AS expr_16, + rank, + ravg1, + ravg2, + rcnt1, + rcnt2, + rsiz1, + rsiz2, + rsum1, + rsum2 + FROM _t2 +) SELECT TRUE AS s00, TRUE AS s01, @@ -8,5 +64,32 @@ SELECT COUNT(*) >= 3 AS s06, FALSE AS s07, COUNT(*) <= 6 AS s08, - FALSE AS s09 -FROM main.sbcustomer + FALSE AS s09, + 91 AS s10, + 0 AS s11, + 50 AS s12, + 35 AS s13, + 25.0 AS s14, + ABS(COUNT(*) * -0.75) AS s15, + 10 AS s16, + COUNT(*) AS s17, + COUNT(*) AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + TRUE AS s24, + MAX(expr_15) AS s25, + AVG(expr_16) AS s26, + MIN(rank) AS s27, + MAX(rank) AS s28, + MAX(rsum1) AS s29, + ROUND(SUM(rsum2), 2) AS s30, + MAX(ravg1) AS s31, + ROUND(SUM(ravg2), 2) AS s32, + MAX(rcnt1) AS s33, + ROUND(SUM(rcnt2), 2) AS s34, + MAX(rsiz1) AS s35, + ROUND(SUM(rsiz2), 2) AS s36 +FROM _t1 From 02c24bd54e79b63ef8acc9b265deb7e2938968aa Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 31 Jul 2025 00:41:53 -0400 Subject: [PATCH 076/143] Revisions --- .../conversion/relational_simplification.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index a1e20c7df..af2d88137 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -1,5 +1,9 @@ """ -Logic used to simplify relational expressions in a relational node. +Logic used to simplify relational expressions in a relational node. A visitor +is used on the relational nodes to first simplify the child subtrees, then a +relational shuttle is run on the expressions of the current node to simplify +them, using the input predicates from the child nodes, and also infer the +predicates of the simplified expressions. """ __all__ = ["simplify_expressions"] @@ -91,7 +95,7 @@ def union(predicates: list["PredicateSet"]) -> "PredicateSet": Computes the union of a list of predicate sets. """ result: PredicateSet = PredicateSet() - for pred in predicates[1:]: + for pred in predicates: result = result | pred return result @@ -304,7 +308,7 @@ def simplify_function_call( union_set: PredicateSet = PredicateSet.union(arg_predicates) intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) - # If the call has null propagating rules, all of hte arguments are + # If the call has null propagating rules, all of the arguments are # non-null, the output is guaranteed to be non-null. if expr.op in NULL_PROPAGATING_OPS: if intersect_set.not_null: @@ -411,7 +415,7 @@ def simplify_function_call( if PredicateSet.union(new_predicates).not_null: output_predicates.not_null = True - # ABS(x) -> x if x is positive or non-negative. At hte very least, we + # ABS(x) -> x if x is positive or non-negative. At the very least, we # know it is always non-negative. case pydop.ABS: if arg_predicates[0].not_negative or arg_predicates[0].positive: @@ -434,7 +438,9 @@ def simplify_function_call( # LOWER, UPPER, STARTSWITH, ENDSWITH, and CONTAINS can be constant # folded if the inputs are string literals. The boolean-returning - # operators are always non-negative. + # operators are always non-negative. Most of cases do not set + # predicates because there are no predicates to infer, beyond those + # already accounted for with NULL_PROPAGATING_OPS. case pydop.LOWER: if isinstance(expr.inputs[0], LiteralExpression) and isinstance( expr.inputs[0].value, str @@ -567,7 +573,7 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(False, expr.data_type) - if all( + elif all( isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] for arg in expr.inputs @@ -584,7 +590,7 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - if all( + elif all( isinstance(arg, LiteralExpression) and arg.value in [0, False, None] for arg in expr.inputs ): @@ -603,7 +609,6 @@ def simplify_function_call( ) output_predicates.positive = not bool(expr.inputs[0].value) output_predicates.not_negative = True - pass case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: match (expr.inputs[0], expr.op, expr.inputs[1]): @@ -669,6 +674,7 @@ def simplify_function_call( output_expr = LiteralExpression(x >= y, expr.data_type) # type: ignore case _: + # All other cases remain non-simplified. pass output_predicates.not_negative = True @@ -727,6 +733,9 @@ def simplify_function_call( output_predicates |= arg_predicates[0] & PredicateSet( not_null=True, not_negative=True ) + case _: + # All other operators remain non-simplified. + pass self.stack.append(output_predicates) return output_expr @@ -793,6 +802,10 @@ def simplify_window_call( output_predicates.positive = True output_predicates.not_negative = True + case _: + # All other operators remain non-simplified. + pass + self.stack.append(output_predicates) return output_expr From 2c65773518a180471fe4b0c80461cd51dc579a55 Mon Sep 17 00:00:00 2001 From: knassre-bodo <105652923+knassre-bodo@users.noreply.github.com> Date: Thu, 31 Jul 2025 00:42:29 -0400 Subject: [PATCH 077/143] Apply suggestions from code review Co-authored-by: Hadia Ahmed --- pydough/conversion/relational_simplification.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index af2d88137..3ca3a19cb 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -211,7 +211,7 @@ def input_predicates(self, value: dict[RelationalExpression, PredicateSet]) -> N @property def no_group_aggregate(self) -> bool: """ - Returns whether the shuttle currently a handling no-group-aggregate. + Returns whether the shuttle is currently handling a no-group-aggregate. """ return self._no_group_aggregate @@ -321,9 +321,9 @@ def simplify_function_call( output_predicates.not_null = True output_predicates.not_negative = True - # The output if COUNT(*) is positive if unless doing a + # The output of COUNT(*) is positive unless doing a # no-groupby aggregation. Same goes for calling COUNT or - # NDISTINCT ona non-null column. + # NDISTINCT on a non-null column. if not no_group_aggregate: if len(expr.inputs) == 0 or arg_predicates[0].not_null: output_predicates.positive = True @@ -339,7 +339,7 @@ def simplify_function_call( output_predicates.positive = True output_expr = CallExpression(pydop.COUNT, expr.data_type, []) - # All of these operators are non-null aor non-negative if their + # All of these operators are non-null or non-negative if their # first argument is. case ( pydop.SUM From cc12363480defa6441f39e8e21669ba47361e47f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 31 Jul 2025 00:43:36 -0400 Subject: [PATCH 078/143] edit --- pydough/conversion/relational_simplification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index af2d88137..5d7ce9aef 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -231,7 +231,7 @@ def visit_literal_expression( output_predicates: PredicateSet = PredicateSet() if literal_expression.value is not None: output_predicates.not_null = True - if isinstance(literal_expression.value, (int, float)): + if isinstance(literal_expression.value, (int, float, bool)): if literal_expression.value >= 0: output_predicates.not_negative = True if literal_expression.value > 0: From 6ec13f1df0266596e74a34c60f6305e1e7f2c6e9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 1 Aug 2025 10:45:47 -0400 Subject: [PATCH 079/143] [RUN CI] --- tests/test_sql_refsols/defog_broker_adv8_ansi.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql index d5d15e56c..c130ba30f 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql @@ -7,4 +7,4 @@ JOIN main.sbcustomer AS sbcustomer AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE sbtransaction.sbtxdatetime < DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()) - AND sbtransaction.sbtxdatetime >= DATE_ADD(DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()), -1, 'WEEK') + AND sbtransaction.sbtxdatetime >= DATE_ADD(DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()), -7, 'DAY') From 9344c9a13f46f61bd6b60801cdcd44468b4f6049 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 1 Aug 2025 10:58:51 -0400 Subject: [PATCH 080/143] Fixing SQL test [RUN CI] --- tests/test_sql_refsols/defog_broker_adv8_ansi.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql index c130ba30f..d5d15e56c 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql @@ -7,4 +7,4 @@ JOIN main.sbcustomer AS sbcustomer AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE sbtransaction.sbtxdatetime < DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()) - AND sbtransaction.sbtxdatetime >= DATE_ADD(DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()), -7, 'DAY') + AND sbtransaction.sbtxdatetime >= DATE_ADD(DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()), -1, 'WEEK') From 59850bd72af909f11ed55074431afb34eda402b5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 1 Aug 2025 11:10:12 -0400 Subject: [PATCH 081/143] [RUN CI] From f460cdaa0d6eebd96eaf4ea81a9f78e249ba28ee Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 1 Aug 2025 17:00:41 -0400 Subject: [PATCH 082/143] Revision --- tests/test_plan_refsols/smoke_c.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_plan_refsols/smoke_c.txt b/tests/test_plan_refsols/smoke_c.txt index 33bd737f0..c9307612d 100644 --- a/tests/test_plan_refsols/smoke_c.txt +++ b/tests/test_plan_refsols/smoke_c.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('a', n_rows), ('b', DEFAULT_TO(sum_expr_18, 0:numeric)), ('c', DEFAULT_TO(sum_expr_25, 0:numeric)), ('d', ndistinct_c_mktsegment), ('e', ROUND(avg_expr_26, 4:numeric)), ('f', min_c_acctbal), ('g', max_c_acctbal), ('h', anything_expr_27), ('i', count_expr_21), ('j', CEIL(population_variance_expr_21)), ('k', ROUND(sample_variance_expr_19, 4:numeric)), ('l', FLOOR(population_std_expr_19)), ('m', ROUND(sample_std_expr_21, 4:numeric)), ('n', ROUND(avg_expr_22, 2:numeric)), ('o', sum_expr_23), ('p', sum_expr_24), ('q', agg_16), ('r', median_c_acctbal)], orderings=[]) - AGGREGATE(keys={}, aggregations={'agg_16': QUANTILE(c_acctbal, 0.2:numeric), 'anything_expr_27': ANYTHING(SLICE(c_name, None:unknown, 1:numeric, None:unknown)), 'avg_expr_22': AVG(DEFAULT_TO(KEEP_IF(c_acctbal, c_acctbal > 0:numeric), 0:numeric)), 'avg_expr_26': AVG(ABS(c_acctbal)), 'count_expr_21': COUNT(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'max_c_acctbal': MAX(c_acctbal), 'median_c_acctbal': MEDIAN(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'ndistinct_c_mktsegment': NDISTINCT(c_mktsegment), 'population_std_expr_19': POPULATION_STD(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'population_variance_expr_21': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'sample_std_expr_21': SAMPLE_STD(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'sample_variance_expr_19': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'sum_expr_18': SUM(FLOOR(c_acctbal)), 'sum_expr_23': SUM(PRESENT(KEEP_IF(c_acctbal, c_acctbal > 1000:numeric))), 'sum_expr_24': SUM(ABSENT(KEEP_IF(c_acctbal, c_acctbal > 1000:numeric))), 'sum_expr_25': SUM(CEIL(c_acctbal))}) +ROOT(columns=[('a', n_rows), ('b', DEFAULT_TO(sum_expr_18, 0:numeric)), ('c', DEFAULT_TO(sum_expr_25, 0:numeric)), ('d', ndistinct_c_mktsegment), ('e', ROUND(avg_expr_26, 4:numeric)), ('f', min_c_acctbal), ('g', max_c_acctbal), ('h', anything_expr_27), ('i', count_expr_21), ('j', CEIL(population_var_expr_21)), ('k', ROUND(sample_var_expr_19, 4:numeric)), ('l', FLOOR(population_std_expr_19)), ('m', ROUND(sample_std_expr_21, 4:numeric)), ('n', ROUND(avg_expr_22, 2:numeric)), ('o', sum_expr_23), ('p', sum_expr_24), ('q', agg_16), ('r', median_c_acctbal)], orderings=[]) + AGGREGATE(keys={}, aggregations={'agg_16': QUANTILE(c_acctbal, 0.2:numeric), 'anything_expr_27': ANYTHING(SLICE(c_name, None:unknown, 1:numeric, None:unknown)), 'avg_expr_22': AVG(DEFAULT_TO(KEEP_IF(c_acctbal, c_acctbal > 0:numeric), 0:numeric)), 'avg_expr_26': AVG(ABS(c_acctbal)), 'count_expr_21': COUNT(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'max_c_acctbal': MAX(c_acctbal), 'median_c_acctbal': MEDIAN(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'ndistinct_c_mktsegment': NDISTINCT(c_mktsegment), 'population_std_expr_19': POPULATION_STD(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'population_var_expr_21': POPULATION_VAR(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'sample_std_expr_21': SAMPLE_STD(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'sample_var_expr_19': SAMPLE_VAR(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'sum_expr_18': SUM(FLOOR(c_acctbal)), 'sum_expr_23': SUM(PRESENT(KEEP_IF(c_acctbal, c_acctbal > 1000:numeric))), 'sum_expr_24': SUM(ABSENT(KEEP_IF(c_acctbal, c_acctbal > 1000:numeric))), 'sum_expr_25': SUM(CEIL(c_acctbal))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) From 0fa39c781de76f374d59910f44d86bbf5013c15c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 7 Aug 2025 09:42:33 -0400 Subject: [PATCH 083/143] Removing dead comment --- tests/test_pipeline_tpch_custom.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 1680707c1..9bd5b3bf7 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -3265,8 +3265,6 @@ def test_pipeline_e2e_tpch_custom( ), id="bad_cross_6", ), - # TODO: fix the error handling here to give a proper error message - # (currently fails in hybrid due to an assertion) pytest.param( bad_cross_7, None, From 4d899b824d751e396e911917d12b3574f4748d64 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 18 Aug 2025 10:28:25 -0400 Subject: [PATCH 084/143] Adding more comments/docstrings --- pydough/unqualified/unqualified_node.py | 39 +++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 4dc1f57cc..40d8ae6ea 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -893,7 +893,19 @@ def call_function_operator( **kwargs, ) -> UnqualifiedNode: """ - TODO + Creates an invocation of a PyDough (non-window) function operator on the + provided operands and keyword arguments. + + Args: + `operator`: the function operator being called. + `operands`: the list of unqualified nodes being passed as arguments. + `kwargs`: the keyword arguments being passed to the function. These are + used for operators that branch on a keyword, such as variance and + standard deviation which have different sub-operators for population + versus sample. + + Returns: + The unqualified node representing the function call. """ # Check if this is a keyword branching operator @@ -923,25 +935,48 @@ def call_window_operator( operator: pydop.ExpressionWindowOperator, operands: list[UnqualifiedNode], **kwargs ) -> UnqualifiedNode: """ - TODO + Creates an invocation of a PyDough window function operator on the + provided operands and keyword arguments. + + Args: + `operator`: the window function operator being called. + `operands`: the list of unqualified nodes being passed as arguments. + `kwargs`: the keyword arguments being passed to the window function. + These may include `by`, `per`, `n_buckets`, `allow_ties`, `dense`, + `n`, etc. depending on the operator. + + Returns: + The unqualified node representing the window function call. """ match operator: case pydop.PERCENTILE: + # Percentile has an optional `n_buckets` argument, defaulting to 100 is_positive_int.verify(kwargs.get("n_buckets", 100), "`n_buckets` argument") case pydop.RANKING: + # Ranking has optional `allow_ties` and `dense` boolean arguments, + # both defaulting to False is_bool.verify(kwargs.get("allow_ties", False), "`allow_ties` argument") is_bool.verify(kwargs.get("dense", False), "`dense` argument") case pydop.PREV | pydop.NEXT: + # PREV/NEXT have an optional `n` argument, defaulting to 1, which + # could also be a positional argument. is_integer.verify(kwargs.get("n", 1), "`n` argument") if len(operands) > 1: is_integer.verify(operands[1], "`n` argument") + # Extract the `by` argument to the window function, if it has one, and + # verify that it is valid for to have one given the operator and other + # keyword arguments (e.g. cumulative, frame). by: Iterable[UnqualifiedNode] = get_by_arg(kwargs, operator) + + # Any window function can have an optional `per` argument saying which + # ancestor the window function is being computed with regards to. per: str | None = None if "per" in kwargs: per_arg = kwargs.pop("per") is_string.verify(per_arg, "`per` argument") per = per_arg + return UnqualifiedWindow( operator, operands, From e8fd112c854dd3990e06a170c900357bee773b45 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 18 Aug 2025 12:10:36 -0400 Subject: [PATCH 085/143] Initial implementation buggy WIP --- pydough/conversion/join_agg_transpose.py | 165 ++++++++++++++++++ pydough/conversion/relational_converter.py | 5 +- tests/test_plan_refsols/common_prefix_af.txt | 6 +- tests/test_plan_refsols/common_prefix_o.txt | 21 ++- tests/test_plan_refsols/correl_35.txt | 6 +- .../count_cust_supplier_nation_combos.txt | 18 +- .../multi_partition_access_2.txt | 34 ++-- .../multi_partition_access_3.txt | 10 +- .../multi_partition_access_5.txt | 8 +- ...hnograph_incident_rate_by_release_year.txt | 8 +- tests/test_sql_refsols/correl_35_sqlite.sql | 28 +-- .../defog_broker_basic4_ansi.sql | 47 ++--- .../defog_broker_basic4_sqlite.sql | 47 ++--- .../defog_dealership_gen4_ansi.sql | 32 ++-- .../defog_dealership_gen4_sqlite.sql | 40 ++--- ...aph_incident_rate_by_release_year_ansi.sql | 28 +-- ...h_incident_rate_by_release_year_sqlite.sql | 28 +-- 17 files changed, 350 insertions(+), 181 deletions(-) create mode 100644 pydough/conversion/join_agg_transpose.py diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py new file mode 100644 index 000000000..ba54206a9 --- /dev/null +++ b/pydough/conversion/join_agg_transpose.py @@ -0,0 +1,165 @@ +""" """ + +__all__ = ["pull_joins_after_aggregates"] + + +import pydough.pydough_operators as pydop +from pydough.relational import ( + Aggregate, + CallExpression, + ColumnReference, + ColumnReferenceFinder, + Join, + JoinType, + RelationalExpression, + RelationalNode, + RelationalRoot, + RelationalShuttle, +) +from pydough.relational.rel_util import ( + add_input_name, +) + + +class JoinAggregateTransposeShuttle(RelationalShuttle): + """ + TODO + """ + + def __init__(self): + self.finder: ColumnReferenceFinder = ColumnReferenceFinder() + + def reset(self): + self.finder.reset() + + def visit_join(self, node: Join) -> RelationalNode: + if isinstance(node.inputs[0], Aggregate): + return self.generic_visit_inputs( + self.join_aggregate_transpose(node, node.inputs[0]) + ) + return super().visit_join(node) + + def join_aggregate_transpose( + self, join: Join, aggregate: Aggregate + ) -> RelationalNode: + """ + Transposes a Join above an Aggregate into an Aggregate above a Join, + when possible. + + Args: + `join`: the Join node above the Aggregate. + `aggregate`: the Aggregate node that is the left input to the Join. + + Returns: + The new RelationalNode tree with the Join and Aggregate transposed, or + the original Join if the transpose is not possible. + """ + # Verify that the join is an inner, left, or semi-join, and that the + # join cardinality is singular (unless the aggregations are not affected + # by a change in cardinality). + aggs_allow_plural: bool = all( + call.op in (pydop.MIN, pydop.MAX, pydop.ANYTHING, pydop.NDISTINCT) + for call in aggregate.aggregations.values() + ) + if not ( + join.join_type in (JoinType.INNER, JoinType.SEMI) + and (join.cardinality.singular or aggs_allow_plural) + ): + return join + + # Find all of the columns used in the join condition that come from the + # left-hand side of the join. + self.finder.reset() + join.condition.accept(self.finder) + lhs_condition_columns: set[ColumnReference] = { + col + for col in self.finder.get_column_references() + if col.input_name == join.default_input_aliases[0] + } + + # Verify that there is at least one left hand side condition column, + # and all of them are grouping keys in the aggregate. + if len(lhs_condition_columns) == 0 or any( + col.name not in aggregate.keys for col in lhs_condition_columns + ): + return join + + new_join_columns: dict[str, RelationalExpression] = {} + new_key_columns: dict[str, RelationalExpression] = {} + new_aggregate_columns: dict[str, CallExpression] = {} + used_column_names: set[str] = set() + + for col_name, col_expr in join.columns.items(): + self.finder.reset() + col_expr.accept(self.finder) + if all( + expr.input_name == join.default_input_aliases[1] + for expr in self.finder.get_column_references() + ): + new_join_columns[col_name] = col_expr + new_aggregate_columns[col_name] = CallExpression( + pydop.ANYTHING, + col_expr.data_type, + [ColumnReference(col_name, col_expr.data_type)], + ) + used_column_names.add(col_name) + elif not ( + isinstance(col_expr, ColumnReference) + and col_expr.input_name == join.default_input_aliases[0] + ): + return join + + for key_name, key_expr in aggregate.keys.items(): + new_join_columns[key_name] = add_input_name( + key_expr, join.default_input_aliases[0] + ) + if key_name in used_column_names: + assert False + new_key_columns[key_name] = ColumnReference(key_name, col_expr.data_type) + used_column_names.add(key_name) + + for agg_name, agg_expr in aggregate.aggregations.items(): + for input_expr in agg_expr.inputs: + if not isinstance(input_expr, ColumnReference): + assert False + if input_expr.name in new_join_columns: + assert False + new_join_columns[input_expr.name] = add_input_name( + input_expr, join.default_input_aliases[0] + ) + if agg_name in used_column_names: + assert False + new_aggregate_columns[agg_name] = agg_expr + used_column_names.add(agg_name) + + new_join: Join = Join( + inputs=[aggregate.inputs[0], join.inputs[1]], + condition=join.condition, + columns=new_join_columns, + join_type=join.join_type, + cardinality=join.cardinality, + ) + + new_aggregate = Aggregate( + input=new_join, keys=new_key_columns, aggregations=new_aggregate_columns + ) + + # print() + # print(join.to_tree_string()) + # print(lhs_condition_columns) + # print(new_join_columns) + # print(new_key_columns) + # print(new_aggregate_columns) + # print(new_aggregate.to_tree_string()) + # breakpoint() + # return join + + return new_aggregate + + +def pull_joins_after_aggregates(node: RelationalRoot) -> RelationalNode: + """ + TODO + """ + shuttle: JoinAggregateTransposeShuttle = JoinAggregateTransposeShuttle() + return node.accept_shuttle(shuttle) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 4b35b0cb4..692cbf948 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -84,6 +84,7 @@ ) from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree +from .join_agg_transpose import pull_joins_after_aggregates from .merge_projects import merge_projects from .projection_pullup import pullup_projections from .relational_simplification import simplify_expressions @@ -1483,7 +1484,8 @@ def optimize_relational_tree( # A: projection pullup # B: expression simplification # C: filter pushdown - # D: column pruning + # D: join-aggregate transpose + # E: column pruning # This is done because pullup will create more opportunities for expression # simplification, which will allow more filters to be pushed further down, # and the combination of those together will create more opportunities for @@ -1493,6 +1495,7 @@ def optimize_relational_tree( root = confirm_root(pullup_projections(root)) simplify_expressions(root, additional_shuttles) root = confirm_root(push_filters(root)) + root = confirm_root(pull_joins_after_aggregates(root)) root = ColumnPruner().prune_unused_columns(root) # Step 9: re-run projection merging, without pushing into joins. This diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index c35fff43b..8b72513f1 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -8,8 +8,8 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name' AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_name': MAX(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'n_rows': t0.n_rows, 'o_custkey': t0.o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'c_name': ANYTHING(c_name), 'n_rows': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 42ae08339..20f4702e6 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -5,17 +5,16 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', D FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': ANYTHING(s_acctbal), 'sum_n_rows': COUNT(), 'sum_sum_agg_5': SUM(agg_5), 'sum_sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index ec4994e6d..24073e17b 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -12,8 +12,8 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT(), 'p_type': ANYTHING(p_type)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t0.l_partkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) @@ -21,4 +21,4 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index f8c87d703..b243e7f6e 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -1,11 +1,11 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_sum_sum_agg_0), ('total_value', DEFAULT_TO(sum_sum_sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year': year}, aggregations={'sum_sum_sum_sum_agg_0': SUM(sum_sum_sum_agg_0), 'sum_sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'sum_sum_sum_agg_0': t0.sum_sum_sum_agg_0, 'sum_sum_sum_sum_l_extendedprice': t0.sum_sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) - AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year': year}, aggregations={'sum_sum_sum_agg_0': SUM(sum_sum_agg_0), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year': year}, aggregations={'sum_sum_sum_agg_0': SUM(sum_sum_agg_0), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice), 'supplier_nation': ANYTHING(supplier_nation)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t0.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) + AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'s_nationkey': ANYTHING(s_nationkey), 'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'ps_suppkey': ANYTHING(ps_suppkey), 'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_suppkey': t1.ps_suppkey, 'sum_l_extendedprice': t0.sum_l_extendedprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) @@ -14,6 +14,6 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index ea1267de4..4d9bd41a7 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -3,26 +3,26 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares_1, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': ANYTHING(cust_avg_shares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) + JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares_1': ANYTHING(cust_tick_avg_shares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'cus_tick_typ_avg_shares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index b58741ac6..9d772a653 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,14 +1,14 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'type_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) - AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) + AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'sbDpClose': ANYTHING(sbDpClose), 'sbTickerType': ANYTHING(sbTickerType)}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t0.sbDpTickerId, 'sbTickerType': t1.sbTickerType}) JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpTickerId': t1.sbDpTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'ticker_high_price': MAX(sbDpClose)}) JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 5aea3ab12..4d92ec389 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -2,11 +2,11 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'n_ticker_type_trans': ANYTHING(n_ticker_type_trans), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index 052d1893e..5f01eb40f 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -1,10 +1,10 @@ ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric))], orderings=[(release_year):asc_first]) JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) - AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'sum_n_rows': SUM(n_rows_1)}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows_1': COUNT(), 'pr_release': ANYTHING(pr_release)}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'pr_release': t1.pr_release}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) diff --git a/tests/test_sql_refsols/correl_35_sqlite.sql b/tests/test_sql_refsols/correl_35_sqlite.sql index bda581395..5419a1b54 100644 --- a/tests/test_sql_refsols/correl_35_sqlite.sql +++ b/tests/test_sql_refsols/correl_35_sqlite.sql @@ -3,12 +3,12 @@ WITH _s1 AS ( p_partkey, p_type FROM tpch.part -), _s10 AS ( +), _t4 AS ( SELECT COUNT(*) AS n_rows, + MAX(_s11.p_type) AS p_type, customer.c_custkey, customer.c_nationkey, - lineitem.l_partkey, orders.o_orderpriority FROM tpch.customer AS customer JOIN tpch.orders AS orders @@ -31,6 +31,8 @@ WITH _s1 AS ( END = 1 AND CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1997 AND lineitem.l_orderkey = orders.o_orderkey + JOIN _s1 AS _s11 + ON _s11.p_partkey = lineitem.l_partkey GROUP BY customer.c_custkey, customer.c_nationkey, @@ -38,19 +40,17 @@ WITH _s1 AS ( orders.o_orderpriority ), _t3 AS ( SELECT - SUM(_s10.n_rows) AS sum_n_rows, - _s10.c_custkey, - _s10.c_nationkey, - _s10.o_orderpriority, - _s11.p_type - FROM _s10 AS _s10 - JOIN _s1 AS _s11 - ON _s10.l_partkey = _s11.p_partkey + SUM(n_rows) AS sum_n_rows, + c_custkey, + c_nationkey, + o_orderpriority, + p_type + FROM _t4 GROUP BY - _s10.c_custkey, - _s10.c_nationkey, - _s10.o_orderpriority, - _s11.p_type + c_custkey, + c_nationkey, + o_orderpriority, + p_type ) SELECT COUNT(*) AS n diff --git a/tests/test_sql_refsols/defog_broker_basic4_ansi.sql b/tests/test_sql_refsols/defog_broker_basic4_ansi.sql index 1c7306b78..c3d74d72d 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_ansi.sql @@ -1,34 +1,35 @@ -WITH _s0 AS ( +WITH _t2 AS ( SELECT COUNT(*) AS num_transactions, - sbtxcustid, - sbtxtickerid - FROM main.sbtransaction + ANY_VALUE(sbcustomer.sbcuststate) AS sbcuststate, + ANY_VALUE(sbticker.sbtickertype) AS sbtickertype, + sbtransaction.sbtxcustid + FROM main.sbtransaction AS sbtransaction + JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + JOIN main.sbcustomer AS sbcustomer + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid GROUP BY - sbtxcustid, - sbtxtickerid -), _s2 AS ( + sbtransaction.sbtxcustid, + sbtransaction.sbtxtickerid +), _t1 AS ( SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbticker.sbtickertype, - _s0.sbtxcustid - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbtxtickerid = sbticker.sbtickerid + SUM(num_transactions) AS num_transactions, + ANY_VALUE(sbcuststate) AS sbcuststate, + sbtickertype + FROM _t2 GROUP BY - sbticker.sbtickertype, - _s0.sbtxcustid + sbtickertype, + sbtxcustid ) SELECT - sbcustomer.sbcuststate AS state, - _s2.sbtickertype AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 -JOIN main.sbcustomer AS sbcustomer - ON _s2.sbtxcustid = sbcustomer.sbcustid + sbcuststate AS state, + sbtickertype AS ticker_type, + SUM(num_transactions) AS num_transactions +FROM _t1 GROUP BY - sbcustomer.sbcuststate, - _s2.sbtickertype + sbcuststate, + sbtickertype ORDER BY num_transactions DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql index 1c7306b78..adf243d02 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql @@ -1,34 +1,35 @@ -WITH _s0 AS ( +WITH _t2 AS ( SELECT COUNT(*) AS num_transactions, - sbtxcustid, - sbtxtickerid - FROM main.sbtransaction + MAX(sbcustomer.sbcuststate) AS sbcuststate, + MAX(sbticker.sbtickertype) AS sbtickertype, + sbtransaction.sbtxcustid + FROM main.sbtransaction AS sbtransaction + JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + JOIN main.sbcustomer AS sbcustomer + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid GROUP BY - sbtxcustid, - sbtxtickerid -), _s2 AS ( + sbtransaction.sbtxcustid, + sbtransaction.sbtxtickerid +), _t1 AS ( SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbticker.sbtickertype, - _s0.sbtxcustid - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbtxtickerid = sbticker.sbtickerid + SUM(num_transactions) AS num_transactions, + MAX(sbcuststate) AS sbcuststate, + sbtickertype + FROM _t2 GROUP BY - sbticker.sbtickertype, - _s0.sbtxcustid + sbtickertype, + sbtxcustid ) SELECT - sbcustomer.sbcuststate AS state, - _s2.sbtickertype AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 -JOIN main.sbcustomer AS sbcustomer - ON _s2.sbtxcustid = sbcustomer.sbcustid + sbcuststate AS state, + sbtickertype AS ticker_type, + SUM(num_transactions) AS num_transactions +FROM _t1 GROUP BY - sbcustomer.sbcuststate, - _s2.sbtickertype + sbcuststate, + sbtickertype ORDER BY num_transactions DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql index 6ac06680b..373ecd582 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql @@ -1,25 +1,25 @@ -WITH _s0 AS ( +WITH _t2 AS ( SELECT - DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, - SUM(sale_price) AS sum_sale_price, - customer_id - FROM main.sales + ANY_VALUE(customers.state) AS state, + SUM(sales.sale_price) AS sum_sale_price, + DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) AS quarter + FROM main.sales AS sales + JOIN main.customers AS customers + ON customers._id = sales.customer_id WHERE - EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 GROUP BY - DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)), - customer_id + sales.customer_id, + DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) ), _t1 AS ( SELECT - SUM(_s0.sum_sale_price) AS sum_sum_sale_price, - _s0.quarter, - customers.state - FROM _s0 AS _s0 - JOIN main.customers AS customers - ON _s0.customer_id = customers._id + SUM(sum_sale_price) AS sum_sum_sale_price, + quarter, + state + FROM _t2 GROUP BY - _s0.quarter, - customers.state + quarter, + state ) SELECT quarter, diff --git a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql index 96ad10d92..65ad5eed8 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql @@ -1,41 +1,41 @@ -WITH _s0 AS ( +WITH _t2 AS ( SELECT + MAX(customers.state) AS state, + SUM(sales.sale_price) AS sum_sale_price, DATE( - sale_date, + sales.sale_date, 'start of month', '-' || CAST(( ( - CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 + CAST(STRFTIME('%m', DATETIME(sales.sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' - ) AS quarter, - SUM(sale_price) AS sum_sale_price, - customer_id - FROM main.sales + ) AS quarter + FROM main.sales AS sales + JOIN main.customers AS customers + ON customers._id = sales.customer_id WHERE - CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 + CAST(STRFTIME('%Y', sales.sale_date) AS INTEGER) = 2023 GROUP BY + sales.customer_id, DATE( - sale_date, + sales.sale_date, 'start of month', '-' || CAST(( ( - CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 + CAST(STRFTIME('%m', DATETIME(sales.sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' - ), - customer_id + ) ), _t1 AS ( SELECT - SUM(_s0.sum_sale_price) AS sum_sum_sale_price, - _s0.quarter, - customers.state - FROM _s0 AS _s0 - JOIN main.customers AS customers - ON _s0.customer_id = customers._id + SUM(sum_sale_price) AS sum_sum_sale_price, + quarter, + state + FROM _t2 GROUP BY - _s0.quarter, - customers.state + quarter, + state ) SELECT quarter, diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql index c00e952eb..592b7dd15 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql @@ -1,24 +1,24 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - de_product_id - FROM main.devices - GROUP BY - de_product_id -), _s1 AS ( +WITH _s1 AS ( SELECT pr_id, pr_release FROM main.products -), _s6 AS ( +), _t1 AS ( SELECT - EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS release_year, - SUM(_s0.n_rows) AS sum_n_rows - FROM _s0 AS _s0 + COUNT(*) AS n_rows_1, + ANY_VALUE(_s1.pr_release) AS pr_release + FROM main.devices AS devices JOIN _s1 AS _s1 - ON _s0.de_product_id = _s1.pr_id + ON _s1.pr_id = devices.de_product_id + GROUP BY + devices.de_product_id +), _s6 AS ( + SELECT + EXTRACT(YEAR FROM CAST(pr_release AS DATETIME)) AS release_year, + SUM(n_rows_1) AS sum_n_rows + FROM _t1 GROUP BY - EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) + EXTRACT(YEAR FROM CAST(pr_release AS DATETIME)) ), _s7 AS ( SELECT COUNT(*) AS n_rows, diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql index df94defe5..3d44bc799 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql @@ -1,24 +1,24 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - de_product_id - FROM main.devices - GROUP BY - de_product_id -), _s1 AS ( +WITH _s1 AS ( SELECT pr_id, pr_release FROM main.products -), _s6 AS ( +), _t1 AS ( SELECT - CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS release_year, - SUM(_s0.n_rows) AS sum_n_rows - FROM _s0 AS _s0 + COUNT(*) AS n_rows_1, + MAX(_s1.pr_release) AS pr_release + FROM main.devices AS devices JOIN _s1 AS _s1 - ON _s0.de_product_id = _s1.pr_id + ON _s1.pr_id = devices.de_product_id + GROUP BY + devices.de_product_id +), _s6 AS ( + SELECT + CAST(STRFTIME('%Y', pr_release) AS INTEGER) AS release_year, + SUM(n_rows_1) AS sum_n_rows + FROM _t1 GROUP BY - CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) + CAST(STRFTIME('%Y', pr_release) AS INTEGER) ), _s7 AS ( SELECT COUNT(*) AS n_rows, From 7bf3268f0151ce51f87e34cc1ead8bb92e28a512 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 20 Aug 2025 20:11:23 -0400 Subject: [PATCH 086/143] WIP fixing column handling triple_partition + other bugs --- pydough/conversion/join_agg_transpose.py | 55 ++++++++++++--- pydough/conversion/relational_converter.py | 2 + .../aggregation_analytics_2.txt | 6 +- .../aggregation_analytics_3.txt | 6 +- tests/test_plan_refsols/correl_30.txt | 26 +++---- .../multi_partition_access_2.txt | 28 ++++---- .../multi_partition_access_4.txt | 8 +-- .../multi_partition_access_6.txt | 52 +++++++------- tests/test_sql_refsols/correl_30_sqlite.sql | 40 +++++------ .../defog_broker_adv5_ansi.sql | 70 ++++++++++--------- .../defog_broker_adv5_sqlite.sql | 56 +++++++-------- 11 files changed, 192 insertions(+), 157 deletions(-) diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py index ba54206a9..d16fec832 100644 --- a/pydough/conversion/join_agg_transpose.py +++ b/pydough/conversion/join_agg_transpose.py @@ -3,6 +3,8 @@ __all__ = ["pull_joins_after_aggregates"] +from collections.abc import Iterable + import pydough.pydough_operators as pydop from pydough.relational import ( Aggregate, @@ -39,6 +41,21 @@ def visit_join(self, node: Join) -> RelationalNode: ) return super().visit_join(node) + def generate_name(self, base: str, used_names: Iterable[str]) -> str: + """ + Generates a new name for a column based on the base name and the existing + columns in the join. This is used to ensure that the new column names are + unique and do not conflict with existing names. + """ + if base not in used_names: + return base + i = 0 + while True: + name = f"{base}_{i}" + if name not in used_names: + return name + i += 1 + def join_aggregate_transpose( self, join: Join, aggregate: Aggregate ) -> RelationalNode: @@ -84,6 +101,13 @@ def join_aggregate_transpose( ): return join + reverse_join_columns: dict[str, RelationalExpression] = {} + for join_col_name, join_col_expr in join.columns.items(): + assert isinstance(join_col_expr, ColumnReference) + reverse_join_columns[join_col_expr.name] = ColumnReference( + join_col_name, join_col_expr.data_type + ) + new_join_columns: dict[str, RelationalExpression] = {} new_key_columns: dict[str, RelationalExpression] = {} new_aggregate_columns: dict[str, CallExpression] = {} @@ -113,22 +137,31 @@ def join_aggregate_transpose( new_join_columns[key_name] = add_input_name( key_expr, join.default_input_aliases[0] ) - if key_name in used_column_names: - assert False - new_key_columns[key_name] = ColumnReference(key_name, col_expr.data_type) - used_column_names.add(key_name) + agg_key_name: str = self.generate_name(key_name, used_column_names) + new_key_columns[agg_key_name] = ColumnReference( + key_name, col_expr.data_type + ) + used_column_names.add(agg_key_name) for agg_name, agg_expr in aggregate.aggregations.items(): + new_inputs: list[RelationalExpression] = [] for input_expr in agg_expr.inputs: - if not isinstance(input_expr, ColumnReference): - assert False - if input_expr.name in new_join_columns: - assert False - new_join_columns[input_expr.name] = add_input_name( + join_name: str + if isinstance(input_expr, ColumnReference): + join_name = self.generate_name(input_expr.name, new_join_columns) + else: + join_name = self.generate_name("expr", new_join_columns) + new_join_columns[join_name] = add_input_name( input_expr, join.default_input_aliases[0] ) - if agg_name in used_column_names: - assert False + new_inputs.append(ColumnReference(join_name, input_expr.data_type)) + agg_name = self.generate_name(agg_name, used_column_names) + if new_inputs != agg_expr.inputs: + agg_expr = CallExpression( + agg_expr.op, + agg_expr.data_type, + new_inputs, + ) new_aggregate_columns[agg_name] = agg_expr used_column_names.add(agg_name) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 692cbf948..924352e52 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1496,6 +1496,8 @@ def optimize_relational_tree( simplify_expressions(root, additional_shuttles) root = confirm_root(push_filters(root)) root = confirm_root(pull_joins_after_aggregates(root)) + print() + print(root.to_tree_string()) root = ColumnPruner().prune_unused_columns(root) # Step 9: re-run projection merging, without pushing into joins. This diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 1d8f376f1..ebea7f70b 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + AGGREGATE(keys={'ps_partkey': anything_ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'p_name': ANYTHING(p_name), 'sum_revenue': SUM(expr)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_ps_partkey': t0.ps_partkey, 'expr': t0.l_extendedprice * 1:numeric - t0.l_discount * 1:numeric - t0.l_tax - t0.l_quantity * t0.ps_supplycost, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) @@ -11,4 +11,4 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_ SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index df3d64e66..0b44cb5a4 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + AGGREGATE(keys={'ps_partkey': anything_ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'p_name': ANYTHING(p_name), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(expr)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_ps_partkey': t0.ps_partkey, 'expr': t0.l_extendedprice * 1:numeric - t0.l_discount * 1:numeric - t0.l_tax - t0.l_quantity * t0.ps_supplycost, 'l_quantity': t0.l_quantity, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) @@ -11,4 +11,4 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_reve SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 1ce81c590..b8b18388e 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) +ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', anything_n_rows)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) + AGGREGATE(keys={'anything_n_nationkey': anything_n_nationkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_rows': ANYTHING(n_rows), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_nationkey': t0.n_nationkey, 'n_name': t0.n_name, 'n_rows': t1.n_rows, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -10,13 +10,13 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 4d9bd41a7..ea9c3f303 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -3,26 +3,26 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares_1, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': ANYTHING(cust_avg_shares)}) + AGGREGATE(keys={'sbTxCustId_0': sbTxCustId_0}, aggregations={'cust_avg_shares': ANYTHING(cust_avg_shares), 'cust_tick_avg_shares': ANYTHING(cust_tick_avg_shares), 'sbTxCustId': ANYTHING(sbTxCustId), 'sbTxTickerId': ANYTHING(sbTxTickerId)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares_1, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId_0': sbTxTickerId_0}, aggregations={'customer_id_9': ANYTHING(customer_id_9), 'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares_1': ANYTHING(cust_tick_avg_shares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares_1': ANYTHING(cust_tick_avg_shares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'sbTxShares': ANYTHING(sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'cus_tick_typ_avg_shares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 9b5f5003a..3e4a0f4a7 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(sbTxShares)}) + AGGREGATE(keys={'sbTxCustId_0': sbTxCustId_0}, aggregations={'cust_max_shares': MAX(sbTxShares), 'cust_ticker_max_shares': ANYTHING(cust_ticker_max_shares), 'sbTxCustId': ANYTHING(sbTxCustId), 'sbTxTickerId': ANYTHING(sbTxTickerId)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 3da2dfe6f..7bf3bb930 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -4,43 +4,43 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=sum_n_cust_type_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxType_0': sbTxType_0}, aggregations={'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t0.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId_0': sbTxTickerId_0}, aggregations={'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxType_0': sbTxType_0}, aggregations={'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t0.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId_0': sbTxTickerId_0}, aggregations={'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'n_ticker_type_trans': ANYTHING(n_ticker_type_trans), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) diff --git a/tests/test_sql_refsols/correl_30_sqlite.sql b/tests/test_sql_refsols/correl_30_sqlite.sql index 1463bac6f..eaf34eca7 100644 --- a/tests/test_sql_refsols/correl_30_sqlite.sql +++ b/tests/test_sql_refsols/correl_30_sqlite.sql @@ -17,21 +17,6 @@ WITH _t2 AS ( FROM tpch.region WHERE NOT r_name IN ('MIDDLE EAST', 'AFRICA', 'ASIA') -), _s12 AS ( - SELECT - MAX(nation.n_name) AS anything_n_name, - MAX(LOWER(_t3.r_name)) AS anything_region_name, - COUNT(*) AS n_rows, - nation.n_nationkey - FROM tpch.nation AS nation - JOIN _s1 AS _s1 - ON _s1.c_nationkey = nation.n_nationkey - JOIN _t3 AS _t3 - ON _t3.r_regionkey = nation.n_regionkey - JOIN _t2 AS _s5 - ON _s1.avg_cust_acctbal < _s5.c_acctbal AND _s5.c_nationkey = nation.n_nationkey - GROUP BY - nation.n_nationkey ), _t5 AS ( SELECT s_acctbal, @@ -59,13 +44,22 @@ WITH _t2 AS ( nation.n_nationkey ) SELECT - _s12.anything_region_name AS region_name, - _s12.anything_n_name AS nation_name, - _s12.n_rows AS n_above_avg_customers, - _s13.n_rows AS n_above_avg_suppliers -FROM _s12 AS _s12 + MAX(LOWER(_t3.r_name)) AS region_name, + MAX(nation.n_name) AS nation_name, + COUNT(*) AS n_above_avg_customers, + MAX(_s13.n_rows) AS n_above_avg_suppliers +FROM tpch.nation AS nation +JOIN _s1 AS _s1 + ON _s1.c_nationkey = nation.n_nationkey +JOIN _t3 AS _t3 + ON _t3.r_regionkey = nation.n_regionkey +JOIN _t2 AS _s5 + ON _s1.avg_cust_acctbal < _s5.c_acctbal AND _s5.c_nationkey = nation.n_nationkey JOIN _s13 AS _s13 - ON _s12.n_nationkey = _s13.n_nationkey + ON _s13.n_nationkey = anything_n_nationkey +GROUP BY + nation.n_nationkey, + n_nationkey ORDER BY - _s12.anything_region_name, - _s12.anything_n_name + MAX(LOWER(_t3.r_name)), + MAX(nation.n_name) diff --git a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql index 48ac2f401..b05541fdc 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql @@ -1,49 +1,55 @@ -WITH _s0 AS ( +WITH _t1 AS ( SELECT - COUNT(sbdpclose) AS count_sbdpclose, - MAX(sbdphigh) AS max_high, - MIN(sbdplow) AS min_low, + COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, + MAX(sbdailyprice.sbdphigh) AS max_high, + MIN(sbdailyprice.sbdplow) AS min_low, + ANY_VALUE(sbticker.sbtickersymbol) AS sbtickersymbol, + SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose, CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), + EXTRACT(YEAR FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))), ( - 2 * -1 - )) + WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), 1, 2) + ELSE SUBSTRING( + CONCAT('00', EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))), + ( + 2 * -1 + ) + ) END - ) AS month, - SUM(sbdpclose) AS sum_sbdpclose, - sbdptickerid - FROM main.sbdailyprice + ) AS month + FROM main.sbdailyprice AS sbdailyprice + JOIN main.sbticker AS sbticker + ON sbdailyprice.sbdptickerid = sbticker.sbtickerid GROUP BY CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), + EXTRACT(YEAR FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))), ( - 2 * -1 - )) + WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), 1, 2) + ELSE SUBSTRING( + CONCAT('00', EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))), + ( + 2 * -1 + ) + ) END ), - sbdptickerid + sbdailyprice.sbdptickerid ), _t0 AS ( SELECT - MAX(_s0.max_high) AS max_high, - MIN(_s0.min_low) AS min_low, - SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, - SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, - _s0.month, - sbticker.sbtickersymbol - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbdptickerid = sbticker.sbtickerid + MAX(max_high) AS max_high, + MIN(min_low) AS min_low, + SUM(count_sbdpclose) AS sum_count_sbdpclose, + SUM(sum_sbdpclose) AS sum_sum_sbdpclose, + month, + sbtickersymbol + FROM _t1 GROUP BY - _s0.month, - sbticker.sbtickersymbol + month, + sbtickersymbol ) SELECT sbtickersymbol AS symbol, diff --git a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql index 8a07d126c..9acd0eb0c 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql @@ -1,49 +1,49 @@ -WITH _s0 AS ( +WITH _t1 AS ( SELECT - COUNT(sbdpclose) AS count_sbdpclose, - MAX(sbdphigh) AS max_high, - MIN(sbdplow) AS min_low, + COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, + MAX(sbdailyprice.sbdphigh) AS max_high, + MIN(sbdailyprice.sbdplow) AS min_low, + MAX(sbticker.sbtickersymbol) AS sbtickersymbol, + SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose, CONCAT_WS( '-', - CAST(STRFTIME('%Y', sbdpdate) AS INTEGER), + CAST(STRFTIME('%Y', sbdailyprice.sbdpdate) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', sbdpdate) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', sbdpdate) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdpdate) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), ( 2 * -1 )) END - ) AS month, - SUM(sbdpclose) AS sum_sbdpclose, - sbdptickerid - FROM main.sbdailyprice + ) AS month + FROM main.sbdailyprice AS sbdailyprice + JOIN main.sbticker AS sbticker + ON sbdailyprice.sbdptickerid = sbticker.sbtickerid GROUP BY CONCAT_WS( '-', - CAST(STRFTIME('%Y', sbdpdate) AS INTEGER), + CAST(STRFTIME('%Y', sbdailyprice.sbdpdate) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', sbdpdate) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', sbdpdate) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdpdate) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), ( 2 * -1 )) END ), - sbdptickerid + sbdailyprice.sbdptickerid ), _t0 AS ( SELECT - MAX(_s0.max_high) AS max_high, - MIN(_s0.min_low) AS min_low, - SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, - SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, - _s0.month, - sbticker.sbtickersymbol - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbdptickerid = sbticker.sbtickerid + MAX(max_high) AS max_high, + MIN(min_low) AS min_low, + SUM(count_sbdpclose) AS sum_count_sbdpclose, + SUM(sum_sbdpclose) AS sum_sum_sbdpclose, + month, + sbtickersymbol + FROM _t1 GROUP BY - _s0.month, - sbticker.sbtickersymbol + month, + sbtickersymbol ) SELECT sbtickersymbol AS symbol, From a34ec879c6ae9d1dcfc6ff449f870e5f45ff975f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 25 Aug 2025 14:05:11 -0400 Subject: [PATCH 087/143] Adding reverse cardinality support --- pydough/conversion/hybrid_connection.py | 10 +++- pydough/conversion/hybrid_decorrelater.py | 8 +++ pydough/conversion/hybrid_tree.py | 53 ++++++++++++++++++- pydough/conversion/relational_converter.py | 30 +++++++++++ pydough/metadata/parse.py | 2 + .../reversible_property_metadata.py | 5 ++ pydough/relational/relational_nodes/join.py | 26 ++++++++- .../access_partition_child_after_filter.txt | 2 +- .../access_partition_child_backref_calc.txt | 2 +- ..._partition_child_filter_backref_filter.txt | 2 +- tests/test_plan_refsols/agg_max_ranking.txt | 2 +- .../agg_orders_by_year_month_just_europe.txt | 8 +-- .../agg_orders_by_year_month_vs_europe.txt | 8 +-- .../agg_parts_by_type_backref_global.txt | 2 +- tests/test_plan_refsols/aggregate_anti.txt | 4 +- .../aggregate_mixed_levels_simple.txt | 4 +- .../aggregate_on_function_call.txt | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 4 +- .../aggregate_then_backref.txt | 4 +- .../aggregation_analytics_1.txt | 12 ++--- .../aggregation_analytics_2.txt | 8 +-- .../aggregation_analytics_3.txt | 8 +-- tests/test_plan_refsols/anti_aggregate.txt | 4 +- .../anti_aggregate_alternate.txt | 4 +- tests/test_plan_refsols/anti_singular.txt | 2 +- tests/test_plan_refsols/asian_nations.txt | 2 +- .../test_plan_refsols/avg_acctbal_wo_debt.txt | 4 +- .../avg_order_diff_per_customer.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_1.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_4.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_5.txt | 4 +- tests/test_plan_refsols/common_prefix_a.txt | 4 +- tests/test_plan_refsols/common_prefix_aa.txt | 4 +- tests/test_plan_refsols/common_prefix_ab.txt | 4 +- tests/test_plan_refsols/common_prefix_ac.txt | 2 +- tests/test_plan_refsols/common_prefix_ad.txt | 8 +-- tests/test_plan_refsols/common_prefix_ae.txt | 8 +-- tests/test_plan_refsols/common_prefix_af.txt | 8 +-- tests/test_plan_refsols/common_prefix_ag.txt | 28 +++++----- tests/test_plan_refsols/common_prefix_ah.txt | 22 ++++---- tests/test_plan_refsols/common_prefix_ai.txt | 20 +++---- tests/test_plan_refsols/common_prefix_aj.txt | 28 +++++----- tests/test_plan_refsols/common_prefix_ak.txt | 26 ++++----- tests/test_plan_refsols/common_prefix_al.txt | 14 ++--- tests/test_plan_refsols/common_prefix_am.txt | 10 ++-- tests/test_plan_refsols/common_prefix_an.txt | 12 ++--- tests/test_plan_refsols/common_prefix_ao.txt | 12 ++--- tests/test_plan_refsols/common_prefix_ap.txt | 6 +-- tests/test_plan_refsols/common_prefix_aq.txt | 8 +-- tests/test_plan_refsols/common_prefix_b.txt | 6 +-- tests/test_plan_refsols/common_prefix_c.txt | 10 ++-- tests/test_plan_refsols/common_prefix_d.txt | 12 ++--- tests/test_plan_refsols/common_prefix_e.txt | 4 +- tests/test_plan_refsols/common_prefix_f.txt | 6 +-- tests/test_plan_refsols/common_prefix_g.txt | 6 +-- tests/test_plan_refsols/common_prefix_h.txt | 10 ++-- tests/test_plan_refsols/common_prefix_i.txt | 4 +- tests/test_plan_refsols/common_prefix_j.txt | 4 +- tests/test_plan_refsols/common_prefix_k.txt | 4 +- tests/test_plan_refsols/common_prefix_l.txt | 10 ++-- tests/test_plan_refsols/common_prefix_m.txt | 10 ++-- tests/test_plan_refsols/common_prefix_n.txt | 14 ++--- tests/test_plan_refsols/common_prefix_o.txt | 14 ++--- tests/test_plan_refsols/common_prefix_p.txt | 6 +-- tests/test_plan_refsols/common_prefix_q.txt | 8 +-- tests/test_plan_refsols/common_prefix_r.txt | 8 +-- tests/test_plan_refsols/common_prefix_s.txt | 6 +-- tests/test_plan_refsols/common_prefix_t.txt | 6 +-- tests/test_plan_refsols/common_prefix_u.txt | 6 +-- tests/test_plan_refsols/common_prefix_v.txt | 4 +- tests/test_plan_refsols/common_prefix_w.txt | 4 +- tests/test_plan_refsols/common_prefix_x.txt | 4 +- tests/test_plan_refsols/common_prefix_y.txt | 4 +- tests/test_plan_refsols/common_prefix_z.txt | 4 +- tests/test_plan_refsols/correl_1.txt | 2 +- tests/test_plan_refsols/correl_10.txt | 2 +- tests/test_plan_refsols/correl_11.txt | 2 +- tests/test_plan_refsols/correl_12.txt | 4 +- tests/test_plan_refsols/correl_13.txt | 4 +- tests/test_plan_refsols/correl_14.txt | 8 +-- tests/test_plan_refsols/correl_15.txt | 10 ++-- tests/test_plan_refsols/correl_16.txt | 6 +-- tests/test_plan_refsols/correl_17.txt | 2 +- tests/test_plan_refsols/correl_18.txt | 2 +- tests/test_plan_refsols/correl_19.txt | 4 +- tests/test_plan_refsols/correl_2.txt | 4 +- tests/test_plan_refsols/correl_20.txt | 10 ++-- tests/test_plan_refsols/correl_21.txt | 2 +- tests/test_plan_refsols/correl_22.txt | 2 +- tests/test_plan_refsols/correl_23.txt | 2 +- tests/test_plan_refsols/correl_24.txt | 2 +- tests/test_plan_refsols/correl_25.txt | 14 ++--- tests/test_plan_refsols/correl_26.txt | 12 ++--- tests/test_plan_refsols/correl_27.txt | 12 ++--- tests/test_plan_refsols/correl_28.txt | 12 ++--- tests/test_plan_refsols/correl_29.txt | 12 ++--- tests/test_plan_refsols/correl_3.txt | 6 +-- tests/test_plan_refsols/correl_30.txt | 14 ++--- tests/test_plan_refsols/correl_31.txt | 10 ++-- tests/test_plan_refsols/correl_32.txt | 6 +-- tests/test_plan_refsols/correl_33.txt | 2 +- tests/test_plan_refsols/correl_34.txt | 14 ++--- tests/test_plan_refsols/correl_35.txt | 14 ++--- tests/test_plan_refsols/correl_36.txt | 22 ++++---- tests/test_plan_refsols/correl_4.txt | 6 +-- tests/test_plan_refsols/correl_5.txt | 6 +-- tests/test_plan_refsols/correl_6.txt | 2 +- tests/test_plan_refsols/correl_7.txt | 2 +- tests/test_plan_refsols/correl_8.txt | 2 +- tests/test_plan_refsols/correl_9.txt | 2 +- ...count_at_most_100_suppliers_per_nation.txt | 2 +- .../count_cust_supplier_nation_combos.txt | 12 ++--- ...multiple_subcollections_alongside_aggs.txt | 4 +- .../count_single_subcollection.txt | 2 +- .../cumulative_stock_analysis.txt | 2 +- .../customer_largest_order_deltas.txt | 4 +- .../customer_most_recent_orders.txt | 2 +- .../customers_sum_line_price.txt | 4 +- .../test_plan_refsols/deep_best_analysis.txt | 20 +++---- tests/test_plan_refsols/double_cross.txt | 6 +-- tests/test_plan_refsols/dumb_aggregation.txt | 2 +- .../epoch_culture_events_info.txt | 10 ++-- .../epoch_event_gap_per_era.txt | 2 +- .../epoch_events_per_season.txt | 2 +- .../epoch_first_event_per_era.txt | 2 +- .../epoch_intra_season_searches.txt | 18 +++---- ...och_most_popular_search_engine_per_tod.txt | 2 +- .../epoch_most_popular_topic_per_region.txt | 4 +- .../epoch_num_predawn_cold_war.txt | 6 +-- ...ping_event_search_other_users_per_user.txt | 8 +-- ...ch_overlapping_event_searches_per_user.txt | 8 +-- .../epoch_pct_searches_per_tod.txt | 2 +- .../epoch_search_results_by_tod.txt | 2 +- .../epoch_summer_events_per_type.txt | 2 +- .../epoch_unique_users_per_engine.txt | 4 +- .../epoch_users_most_cold_war_searches.txt | 6 +-- .../first_order_per_customer.txt | 2 +- tests/test_plan_refsols/function_sampler.txt | 4 +- .../global_aggfunc_backref.txt | 2 +- .../global_aggfuncs_multiple_children.txt | 4 +- tests/test_plan_refsols/hour_minute_day.txt | 2 +- .../join_asia_region_nations.txt | 2 +- tests/test_plan_refsols/join_order_by.txt | 2 +- .../join_order_by_back_reference.txt | 2 +- .../join_order_by_pruned_back_reference.txt | 2 +- .../test_plan_refsols/join_region_nations.txt | 2 +- .../join_region_nations_customers.txt | 4 +- tests/test_plan_refsols/join_topk.txt | 2 +- .../lineitem_regional_shipments.txt | 16 +++--- .../lineitem_regional_shipments2.txt | 16 +++--- .../lineitem_regional_shipments3.txt | 16 +++--- ...lineitems_access_cust_supplier_nations.txt | 12 ++--- .../lines_german_supplier_economy_part.txt | 10 ++-- .../lines_shipping_vs_customer_region.txt | 16 +++--- .../month_year_sliding_windows.txt | 2 +- .../mostly_positive_accounts_per_nation1.txt | 4 +- .../mostly_positive_accounts_per_nation2.txt | 4 +- .../mostly_positive_accounts_per_nation3.txt | 4 +- .../multi_partition_access_2.txt | 20 +++---- .../multi_partition_access_3.txt | 14 ++--- .../multi_partition_access_4.txt | 4 +- .../multi_partition_access_5.txt | 8 +-- .../multi_partition_access_6.txt | 28 +++++----- .../test_plan_refsols/multiple_has_hasnot.txt | 18 +++---- ...ple_simple_aggregations_multiple_calcs.txt | 4 +- ...ltiple_simple_aggregations_single_calc.txt | 4 +- .../nation_acctbal_breakdown.txt | 4 +- tests/test_plan_refsols/nation_best_order.txt | 6 +-- .../nation_name_contains_region_name.txt | 2 +- .../nations_access_region.txt | 2 +- .../nations_order_by_num_suppliers.txt | 2 +- .../nations_region_order_by_name.txt | 2 +- .../nations_sum_line_price.txt | 6 +-- .../num_positive_accounts_per_nation.txt | 4 +- .../odate_and_rdate_avggap.txt | 2 +- .../order_by_before_join.txt | 2 +- .../ordered_asian_nations.txt | 2 +- .../orders_sum_line_price.txt | 2 +- .../orders_sum_vs_count_line_price.txt | 2 +- .../orders_versus_first_orders.txt | 6 +-- tests/test_plan_refsols/pagerank_a1.txt | 6 +-- tests/test_plan_refsols/pagerank_a2.txt | 10 ++-- tests/test_plan_refsols/pagerank_a6.txt | 26 ++++----- tests/test_plan_refsols/pagerank_b3.txt | 14 ++--- tests/test_plan_refsols/pagerank_c4.txt | 18 +++---- tests/test_plan_refsols/pagerank_d5.txt | 22 ++++---- tests/test_plan_refsols/pagerank_h8.txt | 34 ++++++------ tests/test_plan_refsols/part_cross_part_a.txt | 10 ++-- tests/test_plan_refsols/part_cross_part_b.txt | 10 ++-- tests/test_plan_refsols/part_cross_part_c.txt | 10 ++-- tests/test_plan_refsols/part_reduced_size.txt | 2 +- .../parts_quantity_increase_95_96.txt | 8 +-- .../percentile_customers_per_region.txt | 4 +- .../quantile_function_test_1.txt | 2 +- .../quantile_function_test_2.txt | 6 +-- .../quantile_function_test_3.txt | 6 +-- .../quantile_function_test_4.txt | 6 +-- .../rank_customers_per_nation.txt | 2 +- .../rank_customers_per_region.txt | 4 +- .../rank_nations_by_region.txt | 2 +- .../rank_nations_per_region_by_customers.txt | 4 +- ...rank_parts_per_supplier_region_by_size.txt | 8 +-- .../test_plan_refsols/rank_with_filters_c.txt | 2 +- .../region_acctbal_breakdown.txt | 4 +- .../region_nation_window_aggs.txt | 2 +- .../region_nations_backref.txt | 2 +- .../region_orders_from_nations_richest.txt | 6 +-- .../regional_first_order_best_line_part.txt | 10 ++-- .../regional_suppliers_percentile.txt | 6 +-- .../regions_sum_line_price.txt | 8 +-- tests/test_plan_refsols/replace_order_by.txt | 2 +- .../richest_customer_per_region.txt | 4 +- tests/test_plan_refsols/semi_aggregate.txt | 4 +- tests/test_plan_refsols/semi_singular.txt | 2 +- tests/test_plan_refsols/simple_anti_1.txt | 2 +- tests/test_plan_refsols/simple_anti_2.txt | 4 +- tests/test_plan_refsols/simple_cross_1.txt | 2 +- tests/test_plan_refsols/simple_cross_10.txt | 6 +-- tests/test_plan_refsols/simple_cross_11.txt | 2 +- tests/test_plan_refsols/simple_cross_12.txt | 2 +- tests/test_plan_refsols/simple_cross_2.txt | 2 +- tests/test_plan_refsols/simple_cross_3.txt | 14 ++--- tests/test_plan_refsols/simple_cross_4.txt | 4 +- tests/test_plan_refsols/simple_cross_5.txt | 8 +-- tests/test_plan_refsols/simple_cross_6.txt | 2 +- tests/test_plan_refsols/simple_cross_7.txt | 4 +- tests/test_plan_refsols/simple_cross_8.txt | 16 +++--- tests/test_plan_refsols/simple_cross_9.txt | 6 +-- tests/test_plan_refsols/simple_semi_1.txt | 2 +- tests/test_plan_refsols/simple_semi_2.txt | 4 +- tests/test_plan_refsols/simple_var_std.txt | 2 +- tests/test_plan_refsols/singular1.txt | 2 +- tests/test_plan_refsols/singular2.txt | 4 +- tests/test_plan_refsols/singular3.txt | 2 +- tests/test_plan_refsols/singular4.txt | 2 +- tests/test_plan_refsols/singular5.txt | 4 +- tests/test_plan_refsols/singular6.txt | 8 +-- tests/test_plan_refsols/singular7.txt | 6 +-- tests/test_plan_refsols/singular_anti.txt | 2 +- tests/test_plan_refsols/singular_semi.txt | 2 +- tests/test_plan_refsols/smoke_d.txt | 4 +- .../sqlite_udf_combine_strings.txt | 4 +- .../sqlite_udf_count_epsilon.txt | 4 +- .../sqlite_udf_covar_pop.txt | 6 +-- tests/test_plan_refsols/sqlite_udf_nested.txt | 2 +- tests/test_plan_refsols/sqlite_udf_nval.txt | 2 +- .../sqlite_udf_percent_positive.txt | 8 +-- .../test_plan_refsols/supplier_best_part.txt | 8 +-- .../supplier_pct_national_qty.txt | 8 +-- .../test_plan_refsols/suppliers_bal_diffs.txt | 4 +- ...ograph_battery_failure_rates_anomalies.txt | 8 +-- .../technograph_country_cartesian_oddball.txt | 2 +- ...chnograph_country_combination_analysis.txt | 10 ++-- ...nograph_country_incident_rate_analysis.txt | 14 ++--- ...aph_error_percentages_sun_set_by_error.txt | 6 +-- ..._error_rate_sun_set_by_factory_country.txt | 6 +-- .../technograph_global_incident_rate.txt | 2 +- .../technograph_hot_purchase_window.txt | 4 +- ...hnograph_incident_rate_by_release_year.txt | 8 +-- .../technograph_incident_rate_per_brand.txt | 4 +- .../technograph_monthly_incident_rate.txt | 16 +++--- .../technograph_most_unreliable_products.txt | 4 +- ...umulative_incident_rate_goldcopperstar.txt | 16 +++--- ..._year_cumulative_incident_rate_overall.txt | 8 +-- ...top_5_nations_balance_by_num_suppliers.txt | 2 +- .../top_5_nations_by_num_supplierss.txt | 2 +- .../top_customers_by_orders.txt | 2 +- tests/test_plan_refsols/tpch_q10.txt | 6 +-- tests/test_plan_refsols/tpch_q11.txt | 10 ++-- tests/test_plan_refsols/tpch_q12.txt | 2 +- tests/test_plan_refsols/tpch_q13.txt | 2 +- tests/test_plan_refsols/tpch_q14.txt | 2 +- tests/test_plan_refsols/tpch_q15.txt | 6 +-- tests/test_plan_refsols/tpch_q16.txt | 4 +- tests/test_plan_refsols/tpch_q17.txt | 2 +- tests/test_plan_refsols/tpch_q18.txt | 4 +- tests/test_plan_refsols/tpch_q19.txt | 2 +- tests/test_plan_refsols/tpch_q2.txt | 8 +-- tests/test_plan_refsols/tpch_q20.txt | 8 +-- tests/test_plan_refsols/tpch_q21.txt | 14 ++--- tests/test_plan_refsols/tpch_q22.txt | 4 +- tests/test_plan_refsols/tpch_q3.txt | 4 +- tests/test_plan_refsols/tpch_q4.txt | 2 +- tests/test_plan_refsols/tpch_q5.txt | 12 ++--- tests/test_plan_refsols/tpch_q7.txt | 10 ++-- tests/test_plan_refsols/tpch_q8.txt | 14 ++--- tests/test_plan_refsols/tpch_q9.txt | 10 ++-- tests/test_plan_refsols/triple_partition.txt | 16 +++--- .../various_aggfuncs_simple.txt | 2 +- .../window_filter_order_1.txt | 4 +- .../window_filter_order_10.txt | 2 +- .../window_filter_order_2.txt | 4 +- .../window_filter_order_3.txt | 4 +- .../window_filter_order_4.txt | 4 +- .../window_filter_order_5.txt | 2 +- .../window_filter_order_6.txt | 2 +- .../window_filter_order_7.txt | 2 +- .../window_filter_order_8.txt | 4 +- .../window_filter_order_9.txt | 4 +- .../window_sliding_frame_relsize.txt | 2 +- .../window_sliding_frame_relsum.txt | 2 +- .../year_month_nation_orders.txt | 6 +-- 304 files changed, 1074 insertions(+), 948 deletions(-) diff --git a/pydough/conversion/hybrid_connection.py b/pydough/conversion/hybrid_connection.py index e9385c5ef..a39a147b9 100644 --- a/pydough/conversion/hybrid_connection.py +++ b/pydough/conversion/hybrid_connection.py @@ -10,7 +10,7 @@ from enum import Enum from typing import TYPE_CHECKING -from pydough.relational import JoinType +from pydough.relational import JoinCardinality, JoinType from .hybrid_expressions import ( HybridFunctionExpr, @@ -313,6 +313,8 @@ class HybridConnection: child can be defined at (exclusive). - `aggs`: a mapping of aggregation calls made onto expressions relative to the context of `subtree`. + - `reverse_cardinality`: the JoinCardinality of the connection from the + perspective of the child subtree back to the parent tree. """ parent: "HybridTree" @@ -349,6 +351,12 @@ class HybridConnection: expressions defined relative to the child subtree. """ + reverse_cardinality: JoinCardinality + """ + The JoinCardinality of the connection from the perspective of the child + subtree back to the parent tree. + """ + always_exists: bool | None = None """ Whether the connection is guaranteed to have at least one matching diff --git a/pydough/conversion/hybrid_decorrelater.py b/pydough/conversion/hybrid_decorrelater.py index 628d37e4d..fe48c2379 100644 --- a/pydough/conversion/hybrid_decorrelater.py +++ b/pydough/conversion/hybrid_decorrelater.py @@ -9,6 +9,7 @@ import copy import pydough.pydough_operators as pydop +from pydough.relational import JoinCardinality from pydough.types import BooleanType from .hybrid_connection import ConnectionType, HybridConnection @@ -427,6 +428,13 @@ def decorrelate_child( ) if child.connection_type.is_aggregation or is_faux_agg: child.subtree.agg_keys = new_agg_keys + + # Mark the reverse cardinality as SINGULAR_ACCESS since each record of + # the de-correlated child can only match with one record of the + # original parent due to the join keys being based on the uniqueness + # keys of the original parent. + child.reverse_cardinality = JoinCardinality.SINGULAR_ACCESS + # If the child is such that we don't need to keep rows from the parent # without a match, replace the parent & its ancestors with a # HybridPullUp node (and replace any other deleted nodes with no-ops). diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index a9af8d1ef..9567d30f9 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -18,11 +18,13 @@ from pydough.metadata import ( SubcollectionRelationshipMetadata, ) +from pydough.metadata.properties import ReversiblePropertyMetadata from pydough.qdag import ( Literal, SubCollection, TableCollection, ) +from pydough.relational import JoinCardinality from pydough.types import BooleanType, NumericType from .hybrid_connection import ConnectionType, HybridConnection @@ -571,13 +573,27 @@ def add_child( # Return the index of the existing child. return idx + # Infer the cardinality of the join from the perspective of the new + # collection to the existing data. + reverse_cardinality: JoinCardinality = child.infer_root_reverse_cardinality() + # Create and insert the new child connection. new_child_idx = len(self.children) connection: HybridConnection = HybridConnection( - self, child, connection_type, min_steps, max_steps, {} + self, + child, + connection_type, + min_steps, + max_steps, + {}, + reverse_cardinality, ) self._children.append(connection) + # Augment the reverse cardinality if the parent does not always exist. + if (not reverse_cardinality.filters) and (not self.always_exists()): + connection.reverse_cardinality = reverse_cardinality.add_filter() + # If an operation prevents the child's presence from directly # filtering the current level, update its connection type to be either # SINGULAR or AGGREGATION, then insert a similar COUNT(*)/PRESENT @@ -597,6 +613,41 @@ def add_child( # Return the index of the newly created child. return new_child_idx + def infer_root_reverse_cardinality(self) -> JoinCardinality: + """ + TODO + """ + if self.parent is None: + match self.pipeline[0]: + case HybridRoot(): + return JoinCardinality.PLURAL_ACCESS + case HybridCollectionAccess(): + cardinality: JoinCardinality = JoinCardinality.PLURAL_ACCESS + if isinstance(self.pipeline[0].collection, SubCollection): + metadata = self.pipeline[0].collection.subcollection_property + if ( + isinstance(metadata, ReversiblePropertyMetadata) + and metadata.reverse is not None + ): + if metadata.reverse.is_plural: + cardinality = JoinCardinality.PLURAL_ACCESS + else: + cardinality = JoinCardinality.SINGULAR_ACCESS + if not metadata.reverse.always_matches: + cardinality = cardinality.add_filter() + return JoinCardinality.PLURAL_ACCESS + return cardinality + case HybridPartition(): + return self.children[0].subtree.infer_root_reverse_cardinality() + case HybridPartitionChild(): + return self.pipeline[0].subtree.infer_root_reverse_cardinality() + case _: + raise NotImplementedError( + f"Invalid start of pipeline: {self.pipeline[0].__class__.__name__}" + ) + else: + return self.parent.infer_root_reverse_cardinality() + def add_successor(self, successor: "HybridTree") -> None: """ Marks two hybrid trees in a predecessor-successor relationship. diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index df8a5284a..a751bfba4 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -18,6 +18,7 @@ SimpleJoinMetadata, SimpleTableMetadata, ) +from pydough.metadata.properties import ReversiblePropertyMetadata from pydough.qdag import ( Calculate, CollectionAccess, @@ -428,6 +429,7 @@ def join_outputs( rhs_result: TranslationOutput, join_type: JoinType, join_cardinality: JoinCardinality, + reverse_join_cardinality: JoinCardinality, join_keys: list[tuple[HybridExpr, HybridExpr]] | None, join_cond: HybridExpr | None, child_idx: int | None, @@ -444,6 +446,8 @@ def join_outputs( onto `rhs_result`. `join_cardinality`: the cardinality of the join to be used to connect `lhs_result` onto `rhs_result`. + `reverse_join_cardinality`: the cardinality of the join from the + perspective of `rhs_result`. `join_keys`: a list of tuples in the form `(lhs_key, rhs_key)` that represent the equi-join keys used for the join from either side. This can be None if the `join_cond` is provided instead. @@ -488,6 +492,7 @@ def join_outputs( join_type, join_columns, join_cardinality, + reverse_join_cardinality, correl_name=lhs_result.correlated_name, ) input_aliases: list[str | None] = out_rel.default_input_aliases @@ -700,6 +705,7 @@ def handle_children( child_output, child.connection_type.join_type, cardinality, + child.reverse_cardinality, join_keys, child.subtree.general_join_condition, child_idx, @@ -714,6 +720,7 @@ def handle_children( child_output, child.connection_type.join_type, JoinCardinality.SINGULAR_FILTER, + child.reverse_cardinality, join_keys, child.subtree.general_join_condition, child_idx, @@ -839,6 +846,26 @@ def translate_sub_collection( else cardinality.add_filter() ) + # Infer the cardinality of the join from the perspective of the new + # collection to the existing data. + reverse_cardinality: JoinCardinality + if ( + isinstance( + collection_access.subcollection_property, ReversiblePropertyMetadata + ) + and collection_access.subcollection_property.reverse is not None + ): + if collection_access.subcollection_property.reverse.is_plural: + reverse_cardinality = JoinCardinality.PLURAL_ACCESS + else: + reverse_cardinality = JoinCardinality.SINGULAR_ACCESS + if not collection_access.subcollection_property.reverse.always_matches: + reverse_cardinality = reverse_cardinality.add_filter() + else: + reverse_cardinality = JoinCardinality.PLURAL_ACCESS + if (not reverse_cardinality.filters) and (not parent.always_exists()): + reverse_cardinality = reverse_cardinality.add_filter() + join_keys: list[tuple[HybridExpr, HybridExpr]] | None = None join_cond: HybridExpr | None = None match collection_access.subcollection_property: @@ -868,6 +895,7 @@ def translate_sub_collection( rhs_output, JoinType.INNER, cardinality, + reverse_cardinality, join_keys, join_cond, None, @@ -1099,6 +1127,7 @@ def translate_partition_child( child_output, JoinType.INNER, JoinCardinality.PLURAL_FILTER, + JoinCardinality.SINGULAR_ACCESS, join_keys, None, None, @@ -1259,6 +1288,7 @@ def rel_translation( result, JoinType.INNER, JoinCardinality.PLURAL_ACCESS, + JoinCardinality.SINGULAR_ACCESS, join_keys, None, None, diff --git a/pydough/metadata/parse.py b/pydough/metadata/parse.py index 41dce9ec4..3c14da6ed 100644 --- a/pydough/metadata/parse.py +++ b/pydough/metadata/parse.py @@ -311,6 +311,8 @@ def create_reverse_relationship( extra_semantic_info, ) ) + original_property.reverse = reverse_property + reverse_property.reverse = original_property reverse_collection.add_property(reverse_property) diff --git a/pydough/metadata/properties/reversible_property_metadata.py b/pydough/metadata/properties/reversible_property_metadata.py index 42b5983a2..abce1a327 100644 --- a/pydough/metadata/properties/reversible_property_metadata.py +++ b/pydough/metadata/properties/reversible_property_metadata.py @@ -17,6 +17,11 @@ class ReversiblePropertyMetadata(SubcollectionRelationshipMetadata): reverse relationship. """ + reverse: SubcollectionRelationshipMetadata | None = None + """ + The reverse property that goes from the child back to the parent. + """ + @abstractmethod def build_reverse_relationship( self, diff --git a/pydough/relational/relational_nodes/join.py b/pydough/relational/relational_nodes/join.py index fc96c841c..6c6a7a34d 100644 --- a/pydough/relational/relational_nodes/join.py +++ b/pydough/relational/relational_nodes/join.py @@ -160,6 +160,7 @@ def __init__( join_type: JoinType, columns: dict[str, RelationalExpression], cardinality: JoinCardinality = JoinCardinality.UNKNOWN_UNKNOWN, + reverse_cardinality: JoinCardinality = JoinCardinality.UNKNOWN_UNKNOWN, correl_name: str | None = None, ) -> None: super().__init__(columns) @@ -171,6 +172,7 @@ def __init__( self._condition: RelationalExpression = condition self._join_type: JoinType = join_type self._cardinality: JoinCardinality = cardinality + self._reverse_cardinality: JoinCardinality = reverse_cardinality self._correl_name: str | None = correl_name @property @@ -212,7 +214,7 @@ def join_type(self, join_type: JoinType) -> None: @property def cardinality(self) -> JoinCardinality: """ - The type of the joins. + The cardinality of the join, from the perspective of the first input. """ return self._cardinality @@ -223,6 +225,20 @@ def cardinality(self, cardinality: JoinCardinality) -> None: """ self._cardinality = cardinality + @property + def reverse_cardinality(self) -> JoinCardinality: + """ + The cardinality of the join, from the perspective of the second input. + """ + return self._reverse_cardinality + + @reverse_cardinality.setter + def reverse_cardinality(self, cardinality: JoinCardinality) -> None: + """ + The setter for the reverse join cardinality. + """ + self._reverse_cardinality = cardinality + @property def inputs(self) -> list[RelationalNode]: return self._inputs @@ -261,7 +277,12 @@ def to_string(self, compact: bool = False) -> str: if self.cardinality == JoinCardinality.UNKNOWN_UNKNOWN else f", cardinality={self.cardinality.name}" ) - return f"JOIN(condition={self.condition.to_string(compact)}, type={self.join_type.name}{cardinality_suffix}, columns={self.make_column_string(self.columns, compact)}{correl_suffix})" + reverse_cardinality_suffix: str = ( + "" + if self.reverse_cardinality == JoinCardinality.UNKNOWN_UNKNOWN + else f", reverse_cardinality={self.reverse_cardinality.name}" + ) + return f"JOIN(condition={self.condition.to_string(compact)}, type={self.join_type.name}{cardinality_suffix}{reverse_cardinality_suffix}, columns={self.make_column_string(self.columns, compact)}{correl_suffix})" def accept(self, visitor: "RelationalVisitor") -> None: visitor.visit_join(self) @@ -280,5 +301,6 @@ def node_copy( self.join_type, columns, self.cardinality, + self.reverse_cardinality, self.correl_name, ) diff --git a/tests/test_plan_refsols/access_partition_child_after_filter.txt b/tests/test_plan_refsols/access_partition_child_after_filter.txt index 24d37657a..76c2d9652 100644 --- a/tests/test_plan_refsols/access_partition_child_after_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_after_filter.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) FILTER(condition=avg_p_retailprice > 27.5:numeric, columns={'p_type': p_type}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/access_partition_child_backref_calc.txt b/tests/test_plan_refsols/access_partition_child_backref_calc.txt index 5b5f23b34..3296b8420 100644 --- a/tests/test_plan_refsols/access_partition_child_backref_calc.txt +++ b/tests/test_plan_refsols/access_partition_child_backref_calc.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price_versus_avg', p_retailprice - avg_price)], orderings=[]) - JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_price': t0.avg_price, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_price': t0.avg_price, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt b/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt index 799732da6..9527b7c1a 100644 --- a/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - JOIN(condition=t1.p_retailprice < t0.avg_price & t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + JOIN(condition=t1.p_retailprice < t0.avg_price & t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/agg_max_ranking.txt b/tests/test_plan_refsols/agg_max_ranking.txt index 129c232f4..d73080147 100644 --- a/tests/test_plan_refsols/agg_max_ranking.txt +++ b/tests/test_plan_refsols/agg_max_ranking.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('highest_rank', highest_rank)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'highest_rank': t1.highest_rank, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'highest_rank': t1.highest_rank, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'highest_rank': MAX(cust_rank)}) PROJECT(columns={'c_nationkey': c_nationkey, 'cust_rank': RANKING(args=[], partition=[], order=[(c_acctbal):desc_first], allow_ties=True)}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt index 8690211bf..5a9021e4e 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt @@ -1,12 +1,12 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[]) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt index 5325d91e9..1bc99c0d2 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt @@ -1,12 +1,12 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', n_rows), ('total_orders', DEFAULT_TO(agg_1, 0:numeric))], orderings=[]) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.n_rows, 'month': t0.month, 'n_rows': t0.n_rows, 'year': t0.year}) + JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'month': t0.month, 'n_rows': t0.n_rows, 'year': t0.year}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt b/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt index b3c214fec..3a3ff6bd6 100644 --- a/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt +++ b/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_type', p_type), ('percentage_of_parts', n_rows / total_num_parts), ('avg_price', avg_p_retailprice)], orderings=[]) - JOIN(condition=t1.avg_p_retailprice >= t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 'p_type': t1.p_type, 'total_num_parts': t0.total_num_parts}) + JOIN(condition=t1.avg_p_retailprice >= t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 'p_type': t1.p_type, 'total_num_parts': t0.total_num_parts}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice), 'total_num_parts': COUNT()}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index 02458ffdc..d780bbcb5 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index 4efebacf2..81d20edcc 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,7 +1,7 @@ ROOT(columns=[('order_key', o_orderkey), ('max_ratio', max_ratio)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_on_function_call.txt b/tests/test_plan_refsols/aggregate_on_function_call.txt index 3e68d185b..d28ac7f15 100644 --- a/tests/test_plan_refsols/aggregate_on_function_call.txt +++ b/tests/test_plan_refsols/aggregate_on_function_call.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('avg_consumer_value', avg_consumer_value)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 8e8ffd889..3f4f8df2b 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice_1': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 8fa56b648..c3fd73955 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/aggregation_analytics_1.txt b/tests/test_plan_refsols/aggregation_analytics_1.txt index ba946c021..58e0deb0c 100644 --- a/tests/test_plan_refsols/aggregation_analytics_1.txt +++ b/tests/test_plan_refsols/aggregation_analytics_1.txt @@ -1,16 +1,16 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=8:numeric) - JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 1d8f376f1..cb721fe2a 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,9 +1,9 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index df3d64e66..8d3788bf3 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,9 +1,9 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index 02458ffdc..b5feeed30 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index 598407ce0..02ee0138c 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', 0:numeric), ('sum_price_of_10parts', None:unknown)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/anti_singular.txt b/tests/test_plan_refsols/anti_singular.txt index a66ce09a4..117716d5e 100644 --- a/tests/test_plan_refsols/anti_singular.txt +++ b/tests/test_plan_refsols/anti_singular.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('region_name', None:unknown)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/asian_nations.txt b/tests/test_plan_refsols/asian_nations.txt index ad3901411..dc211ee3e 100644 --- a/tests/test_plan_refsols/asian_nations.txt +++ b/tests/test_plan_refsols/asian_nations.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt index c08e7d33f..1dbe56459 100644 --- a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt +++ b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt @@ -1,8 +1,8 @@ ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', sum_sum_expr_1 / sum_count_expr_1)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(LARGEST(c_acctbal, 0:numeric)), 'sum_expr_1': SUM(LARGEST(c_acctbal, 0:numeric))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/avg_order_diff_per_customer.txt b/tests/test_plan_refsols/avg_order_diff_per_customer.txt index 5a91d29b9..e1703a4fb 100644 --- a/tests/test_plan_refsols/avg_order_diff_per_customer.txt +++ b/tests/test_plan_refsols/avg_order_diff_per_customer.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('avg_diff', avg_diff)], orderings=[(avg_diff):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index 7c5488e03..2e05042d5 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 4432b9290..a0a95a823 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 4432b9290..a0a95a823 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index 984b4aad1..da2b4c0c0 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index e58165f3d..3feedb723 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index ae633b5bc..afee2ad7c 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aa.txt b/tests/test_plan_refsols/common_prefix_aa.txt index 71a7e20e6..6ae902b07 100644 --- a/tests/test_plan_refsols/common_prefix_aa.txt +++ b/tests/test_plan_refsols/common_prefix_aa.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_ab.txt b/tests/test_plan_refsols/common_prefix_ab.txt index 77d4e5f1b..323768976 100644 --- a/tests/test_plan_refsols/common_prefix_ab.txt +++ b/tests/test_plan_refsols/common_prefix_ab.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ac.txt b/tests/test_plan_refsols/common_prefix_ac.txt index 6693cbeec..6e860bc35 100644 --- a/tests/test_plan_refsols/common_prefix_ac.txt +++ b/tests/test_plan_refsols/common_prefix_ac.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index b4202ffb5..7803949a3 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ae.txt b/tests/test_plan_refsols/common_prefix_ae.txt index 1bc0d2b9f..32e124483 100644 --- a/tests/test_plan_refsols/common_prefix_ae.txt +++ b/tests/test_plan_refsols/common_prefix_ae.txt @@ -1,13 +1,13 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_customers), ('customer_name', customer_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'customer_name': t1.customer_name, 'n_customers': t1.n_customers, 'n_name': t0.n_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'customer_name': t1.customer_name, 'n_customers': t1.n_customers, 'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'customer_name': MAX(c_name), 'n_customers': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index c35fff43b..e2a896fca 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -1,14 +1,14 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_c_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_c_name': t1.max_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_c_name': t1.max_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_c_name': max_c_name, 'n_rows': n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_name': MAX(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'n_rows': t0.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'n_rows': t0.n_rows, 'o_custkey': t0.o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 13620bfb8..a3e99e82a 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,19 +1,19 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -22,12 +22,12 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -37,7 +37,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index bb3062a31..1e859202f 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,10 +1,10 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -13,12 +13,12 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -28,7 +28,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index 722878bb4..4448ee711 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,21 +1,21 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -25,7 +25,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index 36c32255d..ca09a6735 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,19 +1,19 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -22,12 +22,12 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -37,7 +37,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 534ab16db..ab4e235dd 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,19 +1,19 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -22,11 +22,11 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -36,6 +36,6 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index cd75a8794..f7b11e162 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,22 +1,22 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', n_rows_1)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows_1}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index a10ac09e8..1ed0ccd60 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,17 +1,17 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=sum_agg_3 > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_agg_3': SUM(agg_3)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_3': t1.agg_3, 'o_custkey': t0.o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_3': t1.agg_3, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 5b7ff7b75..396066d64 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,21 +1,21 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index b6b7a4abb..91bfe0dcf 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,26 +1,26 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_ap.txt b/tests/test_plan_refsols/common_prefix_ap.txt index 15bd3d015..7f58c39d0 100644 --- a/tests/test_plan_refsols/common_prefix_ap.txt +++ b/tests/test_plan_refsols/common_prefix_ap.txt @@ -1,10 +1,10 @@ ROOT(columns=[('part_name', p_name), ('supplier_name', s_name), ('supplier_quantity', ps_availqty), ('supplier_nation', n_name)], orderings=[(p_name):asc_first]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=p_brand == 'Brand#32':string & p_size == 10:numeric & CONTAINS(p_name, 'pink':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey, 'p_size': p_size}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 9f93ea84e..944f37615 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -1,13 +1,13 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier', s_name), ('best_part', p_name), ('best_quantity', ps_availqty)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 4936b25c3..74c69984a 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT(), 'n_suppliers': SUM(n_suppliers)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 707a4ade2..26411f439 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,17 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', n_parts)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_parts': t1.sum_agg_22, 'n_suppliers': t1.sum_sum_expr_18, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_parts': t1.sum_agg_22, 'n_suppliers': t1.sum_sum_expr_18, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_22': SUM(agg_22), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_18': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 300cb9ba1..c29d975b2 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,14 +1,14 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(agg_29), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index da9a367f2..a6acb8f40 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index 1b6957518..f47fe1ec5 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_agg_8)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT(), 'sum_agg_8': SUM(agg_8)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 1ba3a7526..3db1c35e4 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_suppliers), ('n_nations', sum_agg_2)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 5d1e6d7e0..97ab075cc 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,17 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_agg_22_1), ('n_suppliers', sum_sum_expr_18_1)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_agg_22_1': t1.sum_agg_22_1, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_18_1': t1.sum_sum_expr_18_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_agg_22_1': t1.sum_agg_22_1, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_18_1': t1.sum_sum_expr_18_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_agg_22_1': SUM(agg_22), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_18_1': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index b277cf6c4..fdb64f108 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_j.txt b/tests/test_plan_refsols/common_prefix_j.txt index 5cd83f892..f6ba49d6c 100644 --- a/tests/test_plan_refsols/common_prefix_j.txt +++ b/tests/test_plan_refsols/common_prefix_j.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_k.txt b/tests/test_plan_refsols/common_prefix_k.txt index 41de2f7c4..8eaecd723 100644 --- a/tests/test_plan_refsols/common_prefix_k.txt +++ b/tests/test_plan_refsols/common_prefix_k.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 67dd911ab..166af0404 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,17 +1,17 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 77d79aae2..1ec0ef2aa 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,15 +1,15 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 3faf6e877..e7b6f51be 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,13 +1,13 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) @@ -16,8 +16,8 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', D FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 42ae08339..199fb1393 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,15 +1,15 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_small_parts': sum_sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) @@ -18,8 +18,8 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', D SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 47e4f9c62..17bd44462 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows), ('n_parts_ordered', n_rows_1), ('n_distinct_parts', ndistinct_l_partkey)], orderings=[(ndistinct_l_partkey / n_rows_1):asc_first, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index 14b2f6278..f08e39969 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,13 +1,13 @@ ROOT(columns=[('name', c_name), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric)), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 8d8562577..f024a6127 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,15 +1,15 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_l_extendedprice), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_extendedprice': max_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_l_extendedprice': MAX(anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_n_rows': SUM(n_rows), 'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'anything_l_extendedprice': t1.anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_extendedprice': t1.anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_l_extendedprice': ANYTHING(l_extendedprice), 'anything_p_name': ANYTHING(p_name), 'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index da50bc6ba..85f114b4e 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', n_rows), ('most_recent_order_distinct', ndistinct_l_suppkey)], orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 9095c85fb..4592b0af2 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 62a207995..8bb6e9ff8 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,13 +1,13 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index 3dc65ce4f..72dfc31e5 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_w.txt b/tests/test_plan_refsols/common_prefix_w.txt index 2d7e3a5d0..b64add79d 100644 --- a/tests/test_plan_refsols/common_prefix_w.txt +++ b/tests/test_plan_refsols/common_prefix_w.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first], limit=5:numeric) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index d738602e4..6283b3bc5 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index df9fcfbf2..2bff6eeda 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 23dd535b3..3b3b5fad6 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index 4a09564e2..71b6a0111 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_10.txt b/tests/test_plan_refsols/correl_10.txt index b94e2bf45..fcb05e48c 100644 --- a/tests/test_plan_refsols/correl_10.txt +++ b/tests/test_plan_refsols/correl_10.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rname', None:unknown)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) PROJECT(columns={'expr_0': SLICE(r_name, None:unknown, 1:numeric, None:unknown), 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_11.txt b/tests/test_plan_refsols/correl_11.txt index 7c55e38f7..b2cdc198b 100644 --- a/tests/test_plan_refsols/correl_11.txt +++ b/tests/test_plan_refsols/correl_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('brand', p_brand)], orderings=[(p_brand):asc_first]) AGGREGATE(keys={'p_brand': p_brand}, aggregations={}) - JOIN(condition=t0.p_brand == t1.p_brand & t1.p_retailprice > 1.4:numeric * t0.avg_price, type=INNER, cardinality=PLURAL_FILTER, columns={'p_brand': t0.p_brand}) + JOIN(condition=t0.p_brand == t1.p_brand & t1.p_retailprice > 1.4:numeric * t0.avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_brand': t0.p_brand}) AGGREGATE(keys={'p_brand': p_brand}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_12.txt b/tests/test_plan_refsols/correl_12.txt index 84f30b21f..3fceabdc2 100644 --- a/tests/test_plan_refsols/correl_12.txt +++ b/tests/test_plan_refsols/correl_12.txt @@ -1,7 +1,7 @@ ROOT(columns=[('brand', p_brand)], orderings=[(p_brand):asc_first]) AGGREGATE(keys={'p_brand': p_brand}, aggregations={}) - JOIN(condition=t1.p_retailprice < t0.global_avg_price & t0.p_brand == t1.p_brand & t1.p_retailprice > t0.brand_avg_price, type=INNER, cardinality=PLURAL_FILTER, columns={'p_brand': t0.p_brand}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'brand_avg_price': t1.brand_avg_price, 'global_avg_price': t0.global_avg_price, 'p_brand': t1.p_brand}) + JOIN(condition=t1.p_retailprice < t0.global_avg_price & t0.p_brand == t1.p_brand & t1.p_retailprice > t0.brand_avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_brand': t0.p_brand}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'brand_avg_price': t1.brand_avg_price, 'global_avg_price': t0.global_avg_price, 'p_brand': t1.p_brand}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_brand': p_brand}, aggregations={'brand_avg_price': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_13.txt b/tests/test_plan_refsols/correl_13.txt index 035b6138f..5a738e623 100644 --- a/tests/test_plan_refsols/correl_13.txt +++ b/tests/test_plan_refsols/correl_13.txt @@ -1,10 +1,10 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) FILTER(condition=s_nationkey <= 3:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index e03669bed..b8d16913a 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,12 +1,12 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index b88f0b314..d821dbbe7 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,15 +1,15 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_16.txt b/tests/test_plan_refsols/correl_16.txt index 06cce05d8..f26b6c5e9 100644 --- a/tests/test_plan_refsols/correl_16.txt +++ b/tests/test_plan_refsols/correl_16.txt @@ -1,9 +1,9 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) FILTER(condition=s_nationkey == n_nationkey & PERCENTILE(args=[], partition=[c_nationkey, s_suppkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=10000) == tile, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) PROJECT(columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey, 'tile': PERCENTILE(args=[], partition=[], order=[(s_acctbal):asc_last, (s_suppkey):asc_last], n_buckets=10000)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index b432479aa..7c1c086c4 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,4 +1,4 @@ ROOT(columns=[('fullname', JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name)))], orderings=[(JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))):asc_first]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 8f05634b3..75910fb98 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', sum_n_above_avg)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': COUNT()}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_totalprice >= 0.5:numeric * DEFAULT_TO(t0.sum_o_totalprice, 0:numeric), type=INNER, cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_totalprice >= 0.5:numeric * DEFAULT_TO(t0.sum_o_totalprice, 0:numeric), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index 607b17293..453727b40 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,7 +1,7 @@ ROOT(columns=[('supplier_name', supplier_name), ('n_super_cust', n_super_cust)], orderings=[(n_super_cust):desc_last], limit=5:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'n_super_cust': COUNT(), 'supplier_name': ANYTHING(s_name)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index c0beb2eaa..7bfc095a3 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('n_selected_custs', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(n_name):asc_first]) - JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 44ce5aa60..29e9f7d0a 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,10 +1,10 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_21.txt b/tests/test_plan_refsols/correl_21.txt index 82b36b594..8a6cdde8d 100644 --- a/tests/test_plan_refsols/correl_21.txt +++ b/tests/test_plan_refsols/correl_21.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) - JOIN(condition=t1.n_rows > t0.avg_n_parts, type=INNER, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t1.n_rows > t0.avg_n_parts, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) AGGREGATE(keys={}, aggregations={'avg_n_parts': AVG(n_parts)}) AGGREGATE(keys={'p_size': p_size}, aggregations={'n_parts': COUNT()}) SCAN(table=tpch.PART, columns={'p_size': p_size}) diff --git a/tests/test_plan_refsols/correl_22.txt b/tests/test_plan_refsols/correl_22.txt index 99d34fe66..aad7e12c4 100644 --- a/tests/test_plan_refsols/correl_22.txt +++ b/tests/test_plan_refsols/correl_22.txt @@ -1,6 +1,6 @@ ROOT(columns=[('container', p_container), ('n_types', n_types)], orderings=[(n_types):desc_last, (p_container):asc_first], limit=5:numeric) AGGREGATE(keys={'p_container': p_container}, aggregations={'n_types': COUNT()}) - JOIN(condition=t1.avg_p_retailprice > t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_container': t1.p_container}) + JOIN(condition=t1.avg_p_retailprice > t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_container': t1.p_container}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_container': p_container, 'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_23.txt b/tests/test_plan_refsols/correl_23.txt index 0538d5363..8622ac925 100644 --- a/tests/test_plan_refsols/correl_23.txt +++ b/tests/test_plan_refsols/correl_23.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) - JOIN(condition=t1.n_rows > t0.avg_n_combo, type=INNER, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t1.n_rows > t0.avg_n_combo, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) AGGREGATE(keys={}, aggregations={'avg_n_combo': AVG(n_combos)}) AGGREGATE(keys={'p_size': p_size}, aggregations={'n_combos': COUNT()}) AGGREGATE(keys={'p_container': p_container, 'p_size': p_size, 'p_type': p_type}, aggregations={}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 0957f70e7..d5b649cbd 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year), ('month', month), ('n_orders_in_range', n_orders_in_range)], orderings=[(year):asc_first, (month):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_orders_in_range': COUNT()}) - JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, columns={'month': t0.month, 'year': t0.year}) + JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_25.txt b/tests/test_plan_refsols/correl_25.txt index f34df3cdf..e71563e9c 100644 --- a/tests/test_plan_refsols/correl_25.txt +++ b/tests/test_plan_refsols/correl_25.txt @@ -1,10 +1,10 @@ ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', r_regionkey), ('cust_nation_name', anything_n_name), ('cust_nation_key', n_nationkey), ('customer_name', anything_c_name), ('n_urgent_semi_domestic_rail_orders', n_urgent_semi_domestic_rail_orders)], orderings=[(n_urgent_semi_domestic_rail_orders):desc_last, (anything_c_name):asc_first], limit=5:numeric) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name), 'n_urgent_semi_domestic_rail_orders': NDISTINCT(l_orderkey)}) - JOIN(condition=t1.n_name != t0.n_name & t0.l_suppkey == t1.s_suppkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.n_name != t0.n_name & t0.l_suppkey == t1.s_suppkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) @@ -12,8 +12,8 @@ ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', r_regio SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 7a1ba0639..977ddb095 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,10 +1,10 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': COUNT(), 'nation_name': ANYTHING(n_name)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -12,6 +12,6 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 2de29c99b..4e8bf9fda 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -1,16 +1,16 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows)], orderings=[(anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) + JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 2de29c99b..a4726946f 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -1,16 +1,16 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows)], orderings=[(anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) + JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 7f4bc0b33..fac5389cf 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,10 +1,10 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_rows_1), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) @@ -12,8 +12,8 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_n AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_3.txt b/tests/test_plan_refsols/correl_3.txt index 46a93a3d4..efeaf72d2 100644 --- a/tests/test_plan_refsols/correl_3.txt +++ b/tests/test_plan_refsols/correl_3.txt @@ -1,11 +1,11 @@ ROOT(columns=[('region_name', r_name), ('n_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=r_regionkey == anything_n_regionkey, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey)}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 1ce81c590..cda02872f 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,9 +1,9 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) @@ -11,9 +11,9 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index 6da351356..684d60c2f 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,10 +1,10 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(l_extendedprice * 1:numeric - l_discount), 'median_rev': MEDIAN(l_extendedprice * 1:numeric - l_discount), 'nation_name': ANYTHING(n_name)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_32.txt b/tests/test_plan_refsols/correl_32.txt index 70a37ff43..1bd7b713a 100644 --- a/tests/test_plan_refsols/correl_32.txt +++ b/tests/test_plan_refsols/correl_32.txt @@ -1,10 +1,10 @@ ROOT(columns=[('customer_name', c_name), ('delta', ABS(c_acctbal - median_s_acctbal))], orderings=[(ABS(c_acctbal - median_s_acctbal)):asc_first], limit=5:numeric) - JOIN(condition=SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) == t1.expr_1 & t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_name': t0.c_name, 'median_s_acctbal': t1.median_s_acctbal}) + JOIN(condition=SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) == t1.expr_1 & t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_name': t0.c_name, 'median_s_acctbal': t1.median_s_acctbal}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) AGGREGATE(keys={'expr_1': SLICE(s_phone, -1:numeric, None:unknown, None:unknown), 'n_nationkey': n_nationkey}, aggregations={'median_s_acctbal': MEDIAN(s_acctbal)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal, 's_phone': t1.s_phone}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal, 's_phone': t1.s_phone}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'MIDDLE EAST':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_33.txt b/tests/test_plan_refsols/correl_33.txt index 1331a72cf..8e6e60923 100644 --- a/tests/test_plan_refsols/correl_33.txt +++ b/tests/test_plan_refsols/correl_33.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) - JOIN(condition=MONTH(t0.first_order_date) == t1.expr_3 & YEAR(t0.first_order_date) == t1.expr_2, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=MONTH(t0.first_order_date) == t1.expr_3 & YEAR(t0.first_order_date) == t1.expr_2, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows}) AGGREGATE(keys={}, aggregations={'first_order_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'expr_2': YEAR(o_orderdate), 'expr_3': MONTH(o_orderdate)}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 5ce2487c5..e10964b38 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -2,13 +2,13 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={}) FILTER(condition=l_orderkey == o_orderkey & ps_partkey == l_partkey & ps_suppkey == l_suppkey & o_totalprice > RELAVG(args=[o_totalprice], partition=[l_linenumber, l_orderkey, ps_partkey, ps_suppkey], order=[]) | RELSIZE(args=[], partition=[l_partkey, l_suppkey], order=[]) == 1:numeric, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) @@ -17,6 +17,6 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_linestatus': l_linestatus, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_returnflag': l_returnflag, 'l_suppkey': l_suppkey}) FILTER(condition=YEAR(o_orderdate) >= 1995:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index ec4994e6d..96ec04e08 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -1,9 +1,9 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.p_type == t1.p_type & t0.s_nationkey == t1.c_nationkey & t0.o_custkey == t1.c_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=SINGULAR_FILTER, columns={}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) + JOIN(condition=t0.p_type == t1.p_type & t0.s_nationkey == t1.c_nationkey & t0.o_custkey == t1.c_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) @@ -12,10 +12,10 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/correl_36.txt b/tests/test_plan_refsols/correl_36.txt index fd87197e6..bb518918c 100644 --- a/tests/test_plan_refsols/correl_36.txt +++ b/tests/test_plan_refsols/correl_36.txt @@ -1,20 +1,20 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.key_12, type=INNER, cardinality=SINGULAR_FILTER, columns={}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.key_12, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey}) FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'key_12': key_12, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}, aggregations={}) - JOIN(condition=t0.p_type == t1.p_type & t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) - JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, columns={'key_12': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) + JOIN(condition=t0.p_type == t1.p_type & t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) + JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) @@ -22,7 +22,7 @@ ROOT(columns=[('n', n)], orderings=[]) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_4.txt b/tests/test_plan_refsols/correl_4.txt index 84fb530c3..f8778c6d6 100644 --- a/tests/test_plan_refsols/correl_4.txt +++ b/tests/test_plan_refsols/correl_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', n_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'smallest_bal': t0.smallest_bal}) + JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'smallest_bal': t0.smallest_bal}) AGGREGATE(keys={}, aggregations={'smallest_bal': MIN(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_5.txt b/tests/test_plan_refsols/correl_5.txt index 49ce7f7e3..1ae689fe3 100644 --- a/tests/test_plan_refsols/correl_5.txt +++ b/tests/test_plan_refsols/correl_5.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', name)], orderings=[(name):asc_first]) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'name': ANYTHING(r_name)}) - JOIN(condition=t1.s_acctbal <= t0.smallest_bal + 4.0:numeric & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'smallest_bal': t0.smallest_bal}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey, 'smallest_bal': t0.smallest_bal}) + JOIN(condition=t1.s_acctbal <= t0.smallest_bal + 4.0:numeric & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'smallest_bal': t0.smallest_bal}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey, 'smallest_bal': t0.smallest_bal}) AGGREGATE(keys={}, aggregations={'smallest_bal': MIN(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_6.txt b/tests/test_plan_refsols/correl_6.txt index b58490dcb..6f798d580 100644 --- a/tests/test_plan_refsols/correl_6.txt +++ b/tests/test_plan_refsols/correl_6.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_prefix_nations', n_rows)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_7.txt b/tests/test_plan_refsols/correl_7.txt index d6b8181db..7586ae463 100644 --- a/tests/test_plan_refsols/correl_7.txt +++ b/tests/test_plan_refsols/correl_7.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_prefix_nations', 0:numeric)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_8.txt b/tests/test_plan_refsols/correl_8.txt index e1c6b0062..3f1326205 100644 --- a/tests/test_plan_refsols/correl_8.txt +++ b/tests/test_plan_refsols/correl_8.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rname', r_name)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) PROJECT(columns={'expr_0': SLICE(r_name, None:unknown, 1:numeric, None:unknown), 'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_9.txt b/tests/test_plan_refsols/correl_9.txt index 09b34ed37..ebcfe8b06 100644 --- a/tests/test_plan_refsols/correl_9.txt +++ b/tests/test_plan_refsols/correl_9.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', n_name), ('rname', r_name)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt index ed98cb596..38615974e 100644 --- a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt +++ b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('n_top_suppliers', DEFAULT_TO(count_s_suppkey, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) LIMIT(limit=100:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}, orderings=[(s_acctbal):asc_last]) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index f8c87d703..3c1b317ec 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -1,14 +1,14 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_sum_sum_agg_0), ('total_value', DEFAULT_TO(sum_sum_sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year': year}, aggregations={'sum_sum_sum_sum_agg_0': SUM(sum_sum_sum_agg_0), 'sum_sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'sum_sum_sum_agg_0': t0.sum_sum_sum_agg_0, 'sum_sum_sum_sum_l_extendedprice': t0.sum_sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'sum_sum_sum_agg_0': t0.sum_sum_sum_agg_0, 'sum_sum_sum_sum_l_extendedprice': t0.sum_sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year': year}, aggregations={'sum_sum_sum_agg_0': SUM(sum_sum_agg_0), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt index af5e3ba37..223617b10 100644 --- a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt +++ b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('num_customers', n_rows), ('num_suppliers', agg_3), ('customer_to_supplier_wealth_ratio', DEFAULT_TO(sum_c_acctbal, 0:numeric) / DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/count_single_subcollection.txt b/tests/test_plan_refsols/count_single_subcollection.txt index bc53fc653..2934bd9b0 100644 --- a/tests/test_plan_refsols/count_single_subcollection.txt +++ b/tests/test_plan_refsols/count_single_subcollection.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('num_customers', num_customers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'num_customers': t1.num_customers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'num_customers': t1.num_customers}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'num_customers': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/cumulative_stock_analysis.txt b/tests/test_plan_refsols/cumulative_stock_analysis.txt index 79ad18038..98ef1fa97 100644 --- a/tests/test_plan_refsols/cumulative_stock_analysis.txt +++ b/tests/test_plan_refsols/cumulative_stock_analysis.txt @@ -1,5 +1,5 @@ ROOT(columns=[('date_time', sbTxDateTime), ('txn_within_day', RELSIZE(args=[], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True)), ('n_buys_within_day', RELCOUNT(args=[KEEP_IF(sbTxType, sbTxType == 'buy':string)], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True)), ('pct_apple_txns', ROUND(100.0:numeric * RELSUM(args=[ISIN(sbTickerSymbol, ['AAPL', 'AMZN']:array[unknown])], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric)), ('share_change', RELSUM(args=[IFF(sbTxType == 'buy':string, sbTxShares, 0:numeric - sbTxShares)], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True)), ('rolling_avg_amount', ROUND(RELAVG(args=[sbTxAmount], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric))], orderings=[(sbTxDateTime):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) FILTER(condition=MONTH(sbTxDateTime) == 4:numeric & YEAR(sbTxDateTime) == 2023:numeric & sbTxStatus == 'success':string, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxStatus': sbTxStatus, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 16700f289..f147342e9 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_diff) > max_diff, min_diff, max_diff))], orderings=[(IFF(ABS(min_diff) > max_diff, min_diff, max_diff)):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 5f000a001..c9265fae6 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_recent_value', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last], limit=3:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/customers_sum_line_price.txt b/tests/test_plan_refsols/customers_sum_line_price.txt index 5b37d11f5..082a9e3e7 100644 --- a/tests/test_plan_refsols/customers_sum_line_price.txt +++ b/tests/test_plan_refsols/customers_sum_line_price.txt @@ -1,8 +1,8 @@ ROOT(columns=[('okey', c_custkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index 5e509bcd6..add80975e 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,24 +1,24 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', key_5), ('c_bal', c_acctbal), ('cr_bal', account_balance_13), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', c_custkey)], orderings=[(n_name):asc_first], limit=10:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_nationkey == s_nationkey & RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/double_cross.txt b/tests/test_plan_refsols/double_cross.txt index 8a8bc62c0..0b5722202 100644 --- a/tests/test_plan_refsols/double_cross.txt +++ b/tests/test_plan_refsols/double_cross.txt @@ -1,9 +1,9 @@ ROOT(columns=[('wk', ord_wk), ('n_lines', n_rows), ('n_orders', anything_n_orders), ('lpo', ROUND(RELSUM(args=[n_rows], partition=[], order=[(line_wk):asc_last], cumulative=True) / RELSUM(args=[anything_n_orders], partition=[], order=[(ord_wk):asc_last], cumulative=True), 4:numeric))], orderings=[(ord_wk):asc_first]) AGGREGATE(keys={'line_wk': DATEDIFF('week':string, min_date, l_receiptdate), 'ord_wk': ord_wk}, aggregations={'anything_n_orders': ANYTHING(n_orders), 'n_rows': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_date, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, columns={'l_receiptdate': t1.l_receiptdate, 'min_date': t0.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'min_date': t1.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) + JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_date, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t1.l_receiptdate, 'min_date': t0.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'min_date': t1.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) AGGREGATE(keys={'ord_wk': DATEDIFF('week':string, min_date, o_orderdate)}, aggregations={'n_orders': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, columns={'min_date': t0.min_date, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'min_date': t0.min_date, 'o_orderdate': t1.o_orderdate}) AGGREGATE(keys={}, aggregations={'min_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) FILTER(condition=o_orderpriority == '1-URGENT':string & o_orderstatus == 'F':string, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/dumb_aggregation.txt b/tests/test_plan_refsols/dumb_aggregation.txt index 2a9906b01..e8c22ab3d 100644 --- a/tests/test_plan_refsols/dumb_aggregation.txt +++ b/tests/test_plan_refsols/dumb_aggregation.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', DEFAULT_TO(r_regionkey, 0:numeric)), ('a4', IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric)), ('a5', 1:numeric), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) LIMIT(limit=2:numeric, columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index 6594b25a9..63f0f6776 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,13 +1,13 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first], limit=6:numeric) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_event_gap_per_era.txt b/tests/test_plan_refsols/epoch_event_gap_per_era.txt index 9859f20d3..e766e591b 100644 --- a/tests/test_plan_refsols/epoch_event_gap_per_era.txt +++ b/tests/test_plan_refsols/epoch_event_gap_per_era.txt @@ -2,6 +2,6 @@ ROOT(columns=[('era_name', er_name), ('avg_event_gap', avg_event_gap)], ordering AGGREGATE(keys={'er_name': er_name}, aggregations={'anything_er_start_year': ANYTHING(er_start_year), 'avg_event_gap': AVG(day_gap)}) FILTER(condition=er_start_year <= YEAR(ev_dt) & YEAR(ev_dt) < er_end_year, columns={'day_gap': day_gap, 'er_name': er_name, 'er_start_year': er_start_year}) PROJECT(columns={'day_gap': DATEDIFF('days':string, PREV(args=[ev_dt], partition=[er_name, er_name], order=[(ev_dt):asc_last]), ev_dt), 'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year, 'ev_dt': ev_dt}) - JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, columns={'er_end_year': t0.er_end_year, 'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt}) + JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'er_end_year': t0.er_end_year, 'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt}) diff --git a/tests/test_plan_refsols/epoch_events_per_season.txt b/tests/test_plan_refsols/epoch_events_per_season.txt index eaf3e6df1..3e35da6bd 100644 --- a/tests/test_plan_refsols/epoch_events_per_season.txt +++ b/tests/test_plan_refsols/epoch_events_per_season.txt @@ -1,5 +1,5 @@ ROOT(columns=[('season_name', s_name), ('n_events', n_events)], orderings=[(n_events):desc_last, (s_name):asc_first]) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_events': COUNT()}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt}) diff --git a/tests/test_plan_refsols/epoch_first_event_per_era.txt b/tests/test_plan_refsols/epoch_first_event_per_era.txt index 14940bc77..a4a1793d0 100644 --- a/tests/test_plan_refsols/epoch_first_event_per_era.txt +++ b/tests/test_plan_refsols/epoch_first_event_per_era.txt @@ -1,5 +1,5 @@ ROOT(columns=[('era_name', er_name), ('event_name', ev_name)], orderings=[(er_start_year):asc_first]) FILTER(condition=RANKING(args=[], partition=[er_name], order=[(ev_dt):asc_last], allow_ties=False) == 1:numeric, columns={'er_name': er_name, 'er_start_year': er_start_year, 'ev_name': ev_name}) - JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, columns={'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt, 'ev_name': t1.ev_name}) + JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt, 'ev_name': t1.ev_name}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index dbbfed385..471d3e119 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,22 +1,22 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) + JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) + JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(name_9 == s_name)}) - JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'name_9': t1.s_name, 's_name': t0.s_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt b/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt index 65a82f60a..9473b9fc4 100644 --- a/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt +++ b/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt @@ -1,6 +1,6 @@ ROOT(columns=[('tod', t_name), ('search_engine', search_engine), ('n_searches', n_searches)], orderings=[(t_name):asc_first]) FILTER(condition=RANKING(args=[], partition=[t_name], order=[(n_searches):desc_first, (search_engine):asc_last], allow_ties=False) == 1:numeric, columns={'n_searches': n_searches, 'search_engine': search_engine, 't_name': t_name}) AGGREGATE(keys={'search_engine': search_engine, 't_name': t_name}, aggregations={'n_searches': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_engine': t1.search_engine, 't_name': t0.t_name}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'search_engine': t1.search_engine, 't_name': t0.t_name}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt b/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt index 5c5133e8c..2638541a6 100644 --- a/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt +++ b/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt @@ -1,8 +1,8 @@ ROOT(columns=[('region', user_region), ('event_type', ev_typ), ('n_searches', n_searches)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[user_region], order=[(n_searches):desc_first], allow_ties=False) == 1:numeric, columns={'ev_typ': ev_typ, 'n_searches': n_searches, 'user_region': user_region}) AGGREGATE(keys={'ev_typ': ev_typ, 'user_region': user_region}, aggregations={'n_searches': NDISTINCT(search_id)}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_typ': t0.ev_typ, 'search_id': t0.search_id, 'user_region': t1.user_region}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_typ': t0.ev_typ, 'search_id': t1.search_id, 'search_user_id': t1.search_user_id}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ev_typ': t0.ev_typ, 'search_id': t0.search_id, 'user_region': t1.user_region}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_typ': t0.ev_typ, 'search_id': t1.search_id, 'search_user_id': t1.search_user_id}) SCAN(table=EVENTS, columns={'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=USERS, columns={'user_id': user_id, 'user_region': user_region}) diff --git a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt index 09744a70d..225fc5ac2 100644 --- a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt +++ b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n_events', n_events)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_events': COUNT()}) - JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, cardinality=SINGULAR_FILTER, columns={}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_key': t0.ev_key}) + JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=t_name == 'Pre-Dawn':string, columns={'t_end_hour': t_end_hour, 't_start_hour': t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_key': t0.ev_key}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt index 2bd8dd3ae..3c0b8dc3a 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt @@ -1,9 +1,9 @@ ROOT(columns=[('user_name', anything_user_name), ('n_other_users', n_other_users)], orderings=[(n_other_users):desc_last, (anything_user_name):asc_first], limit=7:numeric) AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_user_name': ANYTHING(user_name), 'n_other_users': NDISTINCT(user_id_11)}) - JOIN(condition=t1.user_name != t0.user_name & t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t1.user_name != t0.user_name & t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=EVENTS, columns={'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt index f7b32c767..b918f541e 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt @@ -2,10 +2,10 @@ ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_searc AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_searches': COUNT()}) FILTER(condition=user_id == anything_search_user_id, columns={'anything_user_name': anything_user_name, 'user_id': user_id}) AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id), 'anything_user_name': ANYTHING(user_name)}) - JOIN(condition=t1.user_name != t0.user_name & t0.user_id_8 == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_id_8': t1.search_user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t1.user_name != t0.user_name & t0.user_id_8 == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_id_8': t1.search_user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=EVENTS, columns={'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt index 945bd82c2..88b48990a 100644 --- a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt +++ b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_start_hour': ANYTHING(t_start_hour), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_search_results_by_tod.txt b/tests/test_plan_refsols/epoch_search_results_by_tod.txt index 7334ed1ff..a4f00ca05 100644 --- a/tests/test_plan_refsols/epoch_search_results_by_tod.txt +++ b/tests/test_plan_refsols/epoch_search_results_by_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)), ('avg_results', ROUND(avg_search_num_results, 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_start_hour': ANYTHING(t_start_hour), 'avg_search_num_results': AVG(search_num_results), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_num_results': search_num_results, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_summer_events_per_type.txt b/tests/test_plan_refsols/epoch_summer_events_per_type.txt index d5e60f4f4..619b2e3a9 100644 --- a/tests/test_plan_refsols/epoch_summer_events_per_type.txt +++ b/tests/test_plan_refsols/epoch_summer_events_per_type.txt @@ -1,6 +1,6 @@ ROOT(columns=[('event_type', ev_typ), ('n_events', n_events)], orderings=[(ev_typ):asc_first]) AGGREGATE(keys={'ev_typ': ev_typ}, aggregations={'n_events': COUNT()}) - JOIN(condition=MONTH(t0.ev_dt) == t1.first_month | MONTH(t0.ev_dt) == t1.second_month | MONTH(t0.ev_dt) == t1.third_month, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_typ': t0.ev_typ}) + JOIN(condition=MONTH(t0.ev_dt) == t1.first_month | MONTH(t0.ev_dt) == t1.second_month | MONTH(t0.ev_dt) == t1.third_month, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_typ': t0.ev_typ}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_typ': ev_typ}) FILTER(condition=s_name == 'Summer':string, columns={'first_month': s_month1, 'second_month': s_month2, 'third_month': s_month3}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt index 8e972942e..09e213e3f 100644 --- a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt +++ b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt @@ -1,9 +1,9 @@ ROOT(columns=[('engine', search_engine), ('n_users', DEFAULT_TO(ndistinct_user_id, 0:numeric))], orderings=[(search_engine):asc_first]) - JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) + JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) AGGREGATE(keys={'search_engine': search_engine}, aggregations={}) SCAN(table=SEARCHES, columns={'search_engine': search_engine}) AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_user_id': NDISTINCT(user_id)}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) SCAN(table=USERS, columns={'user_id': user_id}) diff --git a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt index 32fa17508..3115b76b7 100644 --- a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt +++ b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt @@ -1,10 +1,10 @@ ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_cold_war_searches)], orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first], limit=3:numeric) - JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_cold_war_searches': COUNT()}) AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/first_order_per_customer.txt b/tests/test_plan_refsols/first_order_per_customer.txt index ef68303cf..3842761d5 100644 --- a/tests/test_plan_refsols/first_order_per_customer.txt +++ b/tests/test_plan_refsols/first_order_per_customer.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order_price', o_totalprice)], orderings=[(o_totalprice):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/function_sampler.txt b/tests/test_plan_refsols/function_sampler.txt index f6aa4aa45..5ab472869 100644 --- a/tests/test_plan_refsols/function_sampler.txt +++ b/tests/test_plan_refsols/function_sampler.txt @@ -1,6 +1,6 @@ ROOT(columns=[('a', JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown))), ('b', ROUND(c_acctbal, 1:numeric)), ('c', KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string)), ('d', PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string))), ('e', ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string))), ('f', ROUND(c_acctbal))], orderings=[(c_address):asc_first], limit=10:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/global_aggfunc_backref.txt b/tests/test_plan_refsols/global_aggfunc_backref.txt index 1a899ced4..2bfe14f1c 100644 --- a/tests/test_plan_refsols/global_aggfunc_backref.txt +++ b/tests/test_plan_refsols/global_aggfunc_backref.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('is_above_avg', p_retailprice > avg_price)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_price': t0.avg_price, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_price': t0.avg_price, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice}) AGGREGATE(keys={}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/global_aggfuncs_multiple_children.txt b/tests/test_plan_refsols/global_aggfuncs_multiple_children.txt index b4f73f77c..6b8378233 100644 --- a/tests/test_plan_refsols/global_aggfuncs_multiple_children.txt +++ b/tests/test_plan_refsols/global_aggfuncs_multiple_children.txt @@ -1,6 +1,6 @@ ROOT(columns=[('num_cust', num_cust), ('num_supp', num_supp), ('num_part', num_part)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'num_cust': t0.num_cust, 'num_part': t1.num_part, 'num_supp': t0.num_supp}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'num_cust': t0.num_cust, 'num_supp': t1.num_supp}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'num_cust': t0.num_cust, 'num_part': t1.num_part, 'num_supp': t0.num_supp}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'num_cust': t0.num_cust, 'num_supp': t1.num_supp}) AGGREGATE(keys={}, aggregations={'num_cust': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={}) AGGREGATE(keys={}, aggregations={'num_supp': COUNT()}) diff --git a/tests/test_plan_refsols/hour_minute_day.txt b/tests/test_plan_refsols/hour_minute_day.txt index 30ead1632..294688d84 100644 --- a/tests/test_plan_refsols/hour_minute_day.txt +++ b/tests/test_plan_refsols/hour_minute_day.txt @@ -1,5 +1,5 @@ ROOT(columns=[('transaction_id', sbTxId), ('_expr0', HOUR(sbTxDateTime)), ('_expr1', MINUTE(sbTxDateTime)), ('_expr2', SECOND(sbTxDateTime))], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/join_asia_region_nations.txt b/tests/test_plan_refsols/join_asia_region_nations.txt index 86200b10a..78532832c 100644 --- a/tests/test_plan_refsols/join_asia_region_nations.txt +++ b/tests/test_plan_refsols/join_asia_region_nations.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_order_by.txt b/tests/test_plan_refsols/join_order_by.txt index 0d23a654e..e84c9ff03 100644 --- a/tests/test_plan_refsols/join_order_by.txt +++ b/tests/test_plan_refsols/join_order_by.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(n_name):desc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_order_by_back_reference.txt b/tests/test_plan_refsols/join_order_by_back_reference.txt index f92cd6f15..c2fa8442c 100644 --- a/tests/test_plan_refsols/join_order_by_back_reference.txt +++ b/tests/test_plan_refsols/join_order_by_back_reference.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(r_name):desc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_order_by_pruned_back_reference.txt b/tests/test_plan_refsols/join_order_by_pruned_back_reference.txt index c3325f51b..ad9495668 100644 --- a/tests/test_plan_refsols/join_order_by_pruned_back_reference.txt +++ b/tests/test_plan_refsols/join_order_by_pruned_back_reference.txt @@ -1,4 +1,4 @@ ROOT(columns=[('nation_name', n_name)], orderings=[(r_name):desc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_region_nations.txt b/tests/test_plan_refsols/join_region_nations.txt index 7e6add7b1..c319d0a25 100644 --- a/tests/test_plan_refsols/join_region_nations.txt +++ b/tests/test_plan_refsols/join_region_nations.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_region_nations_customers.txt b/tests/test_plan_refsols/join_region_nations_customers.txt index 66c4c5236..03148c27c 100644 --- a/tests/test_plan_refsols/join_region_nations_customers.txt +++ b/tests/test_plan_refsols/join_region_nations_customers.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', c_custkey), ('name', c_name), ('address', c_address), ('nation_key', c_nationkey), ('phone', c_phone), ('account_balance', c_acctbal), ('market_segment', c_mktsegment), ('comment', c_comment)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_comment': t1.c_comment, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'c_phone': t1.c_phone}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_comment': t1.c_comment, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'c_phone': t1.c_phone}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/join_topk.txt b/tests/test_plan_refsols/join_topk.txt index f92640023..b5fed4ca0 100644 --- a/tests/test_plan_refsols/join_topk.txt +++ b/tests/test_plan_refsols/join_topk.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(n_name):asc_last], limit=10:numeric) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments.txt b/tests/test_plan_refsols/lineitem_regional_shipments.txt index 7a9764497..ba505efe5 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments.txt @@ -1,17 +1,17 @@ ROOT(columns=[('rname', r_name), ('price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments2.txt b/tests/test_plan_refsols/lineitem_regional_shipments2.txt index 711bcecdf..b79ec8de3 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments2.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments2.txt @@ -1,17 +1,17 @@ ROOT(columns=[('rname', r_name), ('price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'r_name': t1.r_name}) + JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'r_name': t1.r_name}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'r_name': t1.r_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments3.txt b/tests/test_plan_refsols/lineitem_regional_shipments3.txt index 80e53fe22..92972470c 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments3.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments3.txt @@ -1,12 +1,12 @@ ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'r_comment': t1.r_comment, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'r_name': t0.r_name}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t1.o_custkey, 'r_name': t0.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'r_comment': t1.r_comment, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'r_name': t0.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index c9f1900fe..589daa0bc 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,14 +1,14 @@ ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt index 57f92d814..d8e5cd342 100644 --- a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt +++ b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt @@ -1,14 +1,14 @@ ROOT(columns=[('order_key', l_orderkey), ('ship_date', l_shipdate), ('extended_price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=STARTSWITH(p_type, 'ECONOMY':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt index e06f975c8..f1caafb88 100644 --- a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt +++ b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt @@ -1,17 +1,17 @@ ROOT(columns=[('order_year', YEAR(o_orderdate)), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 0bce6d3db..132a9adcc 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) + JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt index 105e87e4a..84d08983f 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt index 68339d2fd..6b901bb6d 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', count_s_suppkey), ('total_suppliers', count_s_suppkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index 9f4f4ddaa..2ab7abe26 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index ea1267de4..00be4afea 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,25 +1,25 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', cus_tick_typ_avg_shares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', cust_avg_shares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) + JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index b58741ac6..2cd523e09 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,19 +1,19 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) - JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) + JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'type_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpTickerId': t1.sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpTickerId': t1.sbDpTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'ticker_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerSymbol': t0.sbTickerSymbol, 'sbTickerType': t0.sbTickerType}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerSymbol': t0.sbTickerSymbol, 'sbTickerType': t0.sbTickerType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol, 'sbTickerType': sbTickerType}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 9b5f5003a..2efeb070b 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 5aea3ab12..3356a5d74 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,13 +1,13 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', sum_n_ticker_type_trans_1), ('n_type_trans', sum_n_ticker_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) - JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) + JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 3da2dfe6f..e793349dc 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -1,47 +1,47 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_ticker_type_trans == 1:numeric | t0.n_cust_type_trans == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_ticker_type_trans == 1:numeric | t0.n_cust_type_trans == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_cust_type_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/multiple_has_hasnot.txt b/tests/test_plan_refsols/multiple_has_hasnot.txt index ba13f4825..ec80d0ec2 100644 --- a/tests/test_plan_refsols/multiple_has_hasnot.txt +++ b/tests/test_plan_refsols/multiple_has_hasnot.txt @@ -1,22 +1,22 @@ ROOT(columns=[('name', p_name)], orderings=[]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'ARGENTINA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index afbab2010..18e86d4d9 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal_1': AVG(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt index 2e2b636f1..a835cca09 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/nation_acctbal_breakdown.txt b/tests/test_plan_refsols/nation_acctbal_breakdown.txt index 977b01033..23594226f 100644 --- a/tests/test_plan_refsols/nation_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/nation_acctbal_breakdown.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_name': t0.n_name, 'n_red_acctbal': t1.n_red_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_name': t0.n_name, 'n_red_acctbal': t1.n_red_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index caee91153..a204db9ef 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -1,12 +1,12 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', o_orderkey), ('order_value', o_totalprice), ('value_percentage', value_percentage)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'value_percentage': t1.value_percentage}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'value_percentage': t1.value_percentage}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/nation_name_contains_region_name.txt b/tests/test_plan_refsols/nation_name_contains_region_name.txt index 1dedfd1e3..2244b257e 100644 --- a/tests/test_plan_refsols/nation_name_contains_region_name.txt +++ b/tests/test_plan_refsols/nation_name_contains_region_name.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey & CONTAINS(t1.n_name, t0.r_name), type=INNER, cardinality=PLURAL_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey & CONTAINS(t1.n_name, t0.r_name), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/nations_access_region.txt b/tests/test_plan_refsols/nations_access_region.txt index d8bdd58af..f9fef55af 100644 --- a/tests/test_plan_refsols/nations_access_region.txt +++ b/tests/test_plan_refsols/nations_access_region.txt @@ -1,4 +1,4 @@ ROOT(columns=[('nation_name', n_name), ('region_name', r_name)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nations_order_by_num_suppliers.txt b/tests/test_plan_refsols/nations_order_by_num_suppliers.txt index 3a35369e0..81fcffc76 100644 --- a/tests/test_plan_refsols/nations_order_by_num_suppliers.txt +++ b/tests/test_plan_refsols/nations_order_by_num_suppliers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/nations_region_order_by_name.txt b/tests/test_plan_refsols/nations_region_order_by_name.txt index 0d2e21720..951583d9a 100644 --- a/tests/test_plan_refsols/nations_region_order_by_name.txt +++ b/tests/test_plan_refsols/nations_region_order_by_name.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(n_name):asc_last, (r_name):asc_last]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nations_sum_line_price.txt b/tests/test_plan_refsols/nations_sum_line_price.txt index 2e514641b..7ec5765c3 100644 --- a/tests/test_plan_refsols/nations_sum_line_price.txt +++ b/tests/test_plan_refsols/nations_sum_line_price.txt @@ -1,9 +1,9 @@ ROOT(columns=[('okey', n_nationkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index b7f84c0f5..add0e0f3c 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/odate_and_rdate_avggap.txt b/tests/test_plan_refsols/odate_and_rdate_avggap.txt index a556f2e22..84a9d4f04 100644 --- a/tests/test_plan_refsols/odate_and_rdate_avggap.txt +++ b/tests/test_plan_refsols/odate_and_rdate_avggap.txt @@ -1,6 +1,6 @@ ROOT(columns=[('avg_gap', avg_gap)], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_gap': AVG(DATEDIFF('days':string, o_orderdate, SMALLEST(l_commitdate, l_receiptdate)))}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipmode': l_shipmode}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/order_by_before_join.txt b/tests/test_plan_refsols/order_by_before_join.txt index 7e6add7b1..c319d0a25 100644 --- a/tests/test_plan_refsols/order_by_before_join.txt +++ b/tests/test_plan_refsols/order_by_before_join.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/ordered_asian_nations.txt b/tests/test_plan_refsols/ordered_asian_nations.txt index 1dff06885..249b892c5 100644 --- a/tests/test_plan_refsols/ordered_asian_nations.txt +++ b/tests/test_plan_refsols/ordered_asian_nations.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(n_name):asc_last]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/orders_sum_line_price.txt b/tests/test_plan_refsols/orders_sum_line_price.txt index c3c7ca09c..e0220e6ff 100644 --- a/tests/test_plan_refsols/orders_sum_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', o_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt index 139b7720a..455520f65 100644 --- a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', o_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 568ad6059..915d68fb4 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,9 +1,9 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', DATEDIFF('days':string, order_date_8, o_orderdate))], orderings=[(DATEDIFF('days':string, order_date_8, o_orderdate)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'VIETNAM':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index 3044ced65..d7c5bd416 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -1,10 +1,10 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0': page_rank_0, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index fb470dd74..9f476f893 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -1,14 +1,14 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_20, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_20': page_rank_0_20, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_20': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index ebb6c114c..e67d91867 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -1,30 +1,30 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_590, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_590': page_rank_0_590, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_590': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_580 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_580': t0.page_rank_0_580, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_580': t0.page_rank_0_580, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_580': page_rank_0_580, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_580': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_570 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_570': t0.page_rank_0_570, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_570': t0.page_rank_0_570, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_570': page_rank_0_570, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_570': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_560 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_560': t0.page_rank_0_560, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_560': t0.page_rank_0_560, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_560': page_rank_0_560, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_560': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_550 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_550': t0.page_rank_0_550, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_550': t0.page_rank_0_550, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_550': page_rank_0_550, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_550': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index 61662e907..a55bcc47a 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -1,18 +1,18 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_58, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_58': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_48 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_48': t0.page_rank_0_48, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_48': t0.page_rank_0_48, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_48': page_rank_0_48, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_48': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index 3bd771724..cf9b305ab 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -1,22 +1,22 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_134, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_134': page_rank_0_134, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_134': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_124 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_124': t0.page_rank_0_124, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_124': t0.page_rank_0_124, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_124': page_rank_0_124, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_124': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_114 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_114': t0.page_rank_0_114, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_114': t0.page_rank_0_114, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_114': page_rank_0_114, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_114': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index 409149b59..382de6bab 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -1,26 +1,26 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_286, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_286': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_276 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_276': t0.page_rank_0_276, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_276': t0.page_rank_0_276, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_276': page_rank_0_276, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_276': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_266 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_266': t0.page_rank_0_266, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_266': t0.page_rank_0_266, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_266': page_rank_0_266, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_266': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_256 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_256': t0.page_rank_0_256, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_256': t0.page_rank_0_256, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_256': page_rank_0_256, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_256': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt index 6c7fe4d18..8551440bf 100644 --- a/tests/test_plan_refsols/pagerank_h8.txt +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -1,38 +1,38 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_2414, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_2414': page_rank_0_2414, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_2414': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2404 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2404': t0.page_rank_0_2404, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2404': t0.page_rank_0_2404, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2404': page_rank_0_2404, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2404': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2394 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2394': t0.page_rank_0_2394, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2394': t0.page_rank_0_2394, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2394': page_rank_0_2394, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2394': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2384 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2384': t0.page_rank_0_2384, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2384': t0.page_rank_0_2384, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2384': page_rank_0_2384, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2384': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2374 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2374': t0.page_rank_0_2374, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2374': t0.page_rank_0_2374, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2374': page_rank_0_2374, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2374': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2364 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2364': t0.page_rank_0_2364, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2364': t0.page_rank_0_2364, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2364': page_rank_0_2364, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2364': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2354 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2354': t0.page_rank_0_2354, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2354': t0.page_rank_0_2354, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2354': page_rank_0_2354, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2354': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/part_cross_part_a.txt b/tests/test_plan_refsols/part_cross_part_a.txt index 94cb27eb1..c4402eb78 100644 --- a/tests/test_plan_refsols/part_cross_part_a.txt +++ b/tests/test_plan_refsols/part_cross_part_a.txt @@ -1,14 +1,14 @@ ROOT(columns=[('state', sbCustState), ('exchange', sbTickerExchange), ('n', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(sbCustState):asc_first, (sbTickerExchange):asc_first]) AGGREGATE(keys={'sbCustState': sbCustState, 'sbTickerExchange': sbTickerExchange}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.sbTickerExchange == t1.sbTickerExchange & t0.sbCustId == t1.sbCustId, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbCustState': t1.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=t0.sbTickerExchange == t1.sbTickerExchange & t0.sbCustId == t1.sbCustId, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbCustState': t1.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) AGGREGATE(keys={'sbTickerExchange': sbTickerExchange}, aggregations={}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId, 'sbCustState': sbCustState}) AGGREGATE(keys={'sbCustId': sbCustId, 'sbTickerExchange': sbTickerExchange}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId & t1.sbTickerExchange == t0.sbTickerExchange, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId & t1.sbTickerExchange == t0.sbTickerExchange, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) AGGREGATE(keys={'sbTickerExchange': sbTickerExchange}, aggregations={}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) diff --git a/tests/test_plan_refsols/part_cross_part_b.txt b/tests/test_plan_refsols/part_cross_part_b.txt index 8685ad920..f32335bba 100644 --- a/tests/test_plan_refsols/part_cross_part_b.txt +++ b/tests/test_plan_refsols/part_cross_part_b.txt @@ -1,15 +1,15 @@ ROOT(columns=[('state', sbCustState), ('month_of_year', month), ('n', RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[sbCustState], order=[(month):asc_last], cumulative=True))], orderings=[(sbCustState):asc_first, (month):asc_first]) - JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) AGGREGATE(keys={'month': DATETIME(sbTxDateTime, 'start of month':string)}, aggregations={}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) AGGREGATE(keys={'month': DATETIME(sbTxDateTime, 'start of month':string)}, aggregations={}) diff --git a/tests/test_plan_refsols/part_cross_part_c.txt b/tests/test_plan_refsols/part_cross_part_c.txt index 3f025b594..9062336ea 100644 --- a/tests/test_plan_refsols/part_cross_part_c.txt +++ b/tests/test_plan_refsols/part_cross_part_c.txt @@ -1,16 +1,16 @@ ROOT(columns=[('state', sbCustState), ('max_n', max_n)], orderings=[]) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'max_n': MAX(DEFAULT_TO(n_rows, 0:numeric))}) - JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) AGGREGATE(keys={'month': DATETIME(sbTxDateTime, 'start of month':string)}, aggregations={}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) AGGREGATE(keys={'month': DATETIME(sbTxDateTime, 'start of month':string)}, aggregations={}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 15f3a6a3e..68b341aaa 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,5 +1,5 @@ ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', INTEGER(p_retailprice)), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last], limit=5:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_retailprice': t0.p_retailprice, 'p_size': t0.p_size}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_retailprice': t0.p_retailprice, 'p_size': t0.p_size}) LIMIT(limit=2:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}, orderings=[(INTEGER(p_retailprice)):asc_first]) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 99a8d19a6..be38b3dc5 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,16 +1,16 @@ ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/percentile_customers_per_region.txt b/tests/test_plan_refsols/percentile_customers_per_region.txt index 7e1c81b55..0000648c9 100644 --- a/tests/test_plan_refsols/percentile_customers_per_region.txt +++ b/tests/test_plan_refsols/percentile_customers_per_region.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name)], orderings=[(c_name):asc_first]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(c_acctbal):asc_last]) == 95:numeric & ENDSWITH(c_phone, '00':string), columns={'c_name': c_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/quantile_function_test_1.txt b/tests/test_plan_refsols/quantile_function_test_1.txt index 51b0be264..c460cafe5 100644 --- a/tests/test_plan_refsols/quantile_function_test_1.txt +++ b/tests/test_plan_refsols/quantile_function_test_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('seventieth_order_price', seventieth_order_price)], orderings=[]) AGGREGATE(keys={}, aggregations={'seventieth_order_price': QUANTILE(o_totalprice, 0.7:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index 811c4b573..163793d83 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,11 +1,11 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index 811c4b573..163793d83 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,11 +1,11 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index 692ccd801..b49c995d4 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,11 +1,11 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/rank_customers_per_nation.txt b/tests/test_plan_refsols/rank_customers_per_nation.txt index 76432f5e6..7384d661e 100644 --- a/tests/test_plan_refsols/rank_customers_per_nation.txt +++ b/tests/test_plan_refsols/rank_customers_per_nation.txt @@ -1,4 +1,4 @@ ROOT(columns=[('nation_name', n_name), ('name', c_name), ('cust_rank', RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first], allow_ties=True))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/rank_customers_per_region.txt b/tests/test_plan_refsols/rank_customers_per_region.txt index ab8ee8d7d..272d3f0a6 100644 --- a/tests/test_plan_refsols/rank_customers_per_region.txt +++ b/tests/test_plan_refsols/rank_customers_per_region.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_name), ('name', c_name), ('cust_rank', RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first], allow_ties=True, dense=True))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/rank_nations_by_region.txt b/tests/test_plan_refsols/rank_nations_by_region.txt index 68631cbf6..7dbc6d2bf 100644 --- a/tests/test_plan_refsols/rank_nations_by_region.txt +++ b/tests/test_plan_refsols/rank_nations_by_region.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[], order=[(r_name):asc_last], allow_ties=True))], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 1e519b6c2..60f1383e8 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first]))], orderings=[(RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])):asc_first], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index 7fd4a68da..0cbd4c887 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,8 +1,8 @@ ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True))], orderings=[(p_partkey):asc_first], limit=15:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/rank_with_filters_c.txt b/tests/test_plan_refsols/rank_with_filters_c.txt index 52ebabdc7..f985be11c 100644 --- a/tests/test_plan_refsols/rank_with_filters_c.txt +++ b/tests/test_plan_refsols/rank_with_filters_c.txt @@ -1,6 +1,6 @@ ROOT(columns=[('pname', p_name), ('psize', size_3)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[p_size], order=[(p_retailprice):desc_first]) == 1:numeric, columns={'p_name': p_name, 'size_3': size_3}) - JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) + JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) LIMIT(limit=5:numeric, columns={'p_size': p_size}, orderings=[(p_size):desc_last]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) SCAN(table=tpch.PART, columns={'p_size': p_size}) diff --git a/tests/test_plan_refsols/region_acctbal_breakdown.txt b/tests/test_plan_refsols/region_acctbal_breakdown.txt index d58e9d39e..88446e286 100644 --- a/tests/test_plan_refsols/region_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/region_acctbal_breakdown.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/region_nation_window_aggs.txt b/tests/test_plan_refsols/region_nation_window_aggs.txt index a748f74d8..0f1fb8476 100644 --- a/tests/test_plan_refsols/region_nation_window_aggs.txt +++ b/tests/test_plan_refsols/region_nation_window_aggs.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('key_sum', RELSUM(args=[n_nationkey], partition=[n_regionkey], order=[])), ('key_avg', RELAVG(args=[n_nationkey], partition=[n_regionkey], order=[])), ('n_short_comment', RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[n_regionkey], order=[])), ('n_nations', RELSIZE(args=[], partition=[n_regionkey], order=[]))], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/region_nations_backref.txt b/tests/test_plan_refsols/region_nations_backref.txt index f24e02fc4..4b66a2f19 100644 --- a/tests/test_plan_refsols/region_nations_backref.txt +++ b/tests/test_plan_refsols/region_nations_backref.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/region_orders_from_nations_richest.txt b/tests/test_plan_refsols/region_orders_from_nations_richest.txt index 19af8caf1..3a4c4a90a 100644 --- a/tests/test_plan_refsols/region_orders_from_nations_richest.txt +++ b/tests/test_plan_refsols/region_orders_from_nations_richest.txt @@ -1,10 +1,10 @@ ROOT(columns=[('region_name', r_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey}) FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/regional_first_order_best_line_part.txt b/tests/test_plan_refsols/regional_first_order_best_line_part.txt index 734a95f90..3f1e334ca 100644 --- a/tests/test_plan_refsols/regional_first_order_best_line_part.txt +++ b/tests/test_plan_refsols/regional_first_order_best_line_part.txt @@ -1,12 +1,12 @@ ROOT(columns=[('region_name', r_name), ('part_name', p_name)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(l_quantity):desc_first, (l_linenumber):asc_last], allow_ties=False) == 1:numeric, columns={'l_partkey': l_partkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'n_regionkey': t0.n_regionkey}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/regional_suppliers_percentile.txt b/tests/test_plan_refsols/regional_suppliers_percentile.txt index f3c756a0d..d968b9e59 100644 --- a/tests/test_plan_refsols/regional_suppliers_percentile.txt +++ b/tests/test_plan_refsols/regional_suppliers_percentile.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', s_name)], orderings=[]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(n_rows):asc_last, (s_name):asc_last], n_buckets=1000) == 1000:numeric, columns={'s_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/regions_sum_line_price.txt b/tests/test_plan_refsols/regions_sum_line_price.txt index 7b66ff2ea..a8f014f55 100644 --- a/tests/test_plan_refsols/regions_sum_line_price.txt +++ b/tests/test_plan_refsols/regions_sum_line_price.txt @@ -1,10 +1,10 @@ ROOT(columns=[('okey', r_regionkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_regionkey': t0.n_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/replace_order_by.txt b/tests/test_plan_refsols/replace_order_by.txt index f92cd6f15..c2fa8442c 100644 --- a/tests/test_plan_refsols/replace_order_by.txt +++ b/tests/test_plan_refsols/replace_order_by.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(r_name):desc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/richest_customer_per_region.txt b/tests/test_plan_refsols/richest_customer_per_region.txt index c6ecbf9f5..426377814 100644 --- a/tests/test_plan_refsols/richest_customer_per_region.txt +++ b/tests/test_plan_refsols/richest_customer_per_region.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('customer_name', c_name), ('balance', c_acctbal)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'n_name': n_name, 'r_name': r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 8b83d01e6..9d44d3f56 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/semi_singular.txt b/tests/test_plan_refsols/semi_singular.txt index dbf9d3306..675513cc3 100644 --- a/tests/test_plan_refsols/semi_singular.txt +++ b/tests/test_plan_refsols/semi_singular.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('region_name', r_name)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/simple_anti_1.txt b/tests/test_plan_refsols/simple_anti_1.txt index 7537fb24b..1b99d8c05 100644 --- a/tests/test_plan_refsols/simple_anti_1.txt +++ b/tests/test_plan_refsols/simple_anti_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', c_name)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/simple_anti_2.txt b/tests/test_plan_refsols/simple_anti_2.txt index 94a3573e4..322e6b23c 100644 --- a/tests/test_plan_refsols/simple_anti_2.txt +++ b/tests/test_plan_refsols/simple_anti_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size < 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_cross_1.txt b/tests/test_plan_refsols/simple_cross_1.txt index 69e8090c6..77eee16bc 100644 --- a/tests/test_plan_refsols/simple_cross_1.txt +++ b/tests/test_plan_refsols/simple_cross_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('r1', r_name), ('r2', r2)], orderings=[(r_name):asc_first, (r2):asc_first]) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'r2': t1.r_name, 'r_name': t0.r_name}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'r2': t1.r_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_10.txt b/tests/test_plan_refsols/simple_cross_10.txt index 275add951..4113ecab2 100644 --- a/tests/test_plan_refsols/simple_cross_10.txt +++ b/tests/test_plan_refsols/simple_cross_10.txt @@ -1,9 +1,9 @@ ROOT(columns=[('region_name', r_name), ('n_other_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'r_regionkey': t0.r_regionkey}) - JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_11.txt b/tests/test_plan_refsols/simple_cross_11.txt index aea66e352..27cff2d37 100644 --- a/tests/test_plan_refsols/simple_cross_11.txt +++ b/tests/test_plan_refsols/simple_cross_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={}, aggregations={'min_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/simple_cross_12.txt b/tests/test_plan_refsols/simple_cross_12.txt index 247a0b247..e1f60b9e3 100644 --- a/tests/test_plan_refsols/simple_cross_12.txt +++ b/tests/test_plan_refsols/simple_cross_12.txt @@ -1,5 +1,5 @@ ROOT(columns=[('order_priority', o_orderpriority), ('market_segment', c_mktsegment)], orderings=[(o_orderpriority):asc_first, (c_mktsegment):asc_first]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_mktsegment': t1.c_mktsegment, 'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_mktsegment': t1.c_mktsegment, 'o_orderpriority': t0.o_orderpriority}) AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={}) SCAN(table=tpch.ORDERS, columns={'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_mktsegment': c_mktsegment}, aggregations={}) diff --git a/tests/test_plan_refsols/simple_cross_2.txt b/tests/test_plan_refsols/simple_cross_2.txt index 714f36eb7..a5f30f5fa 100644 --- a/tests/test_plan_refsols/simple_cross_2.txt +++ b/tests/test_plan_refsols/simple_cross_2.txt @@ -1,4 +1,4 @@ ROOT(columns=[('r1', r_name), ('r2', r2)], orderings=[(r_name):asc_first, (r2):asc_first]) - JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, columns={'r2': t1.r_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r2': t1.r_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_3.txt b/tests/test_plan_refsols/simple_cross_3.txt index 2876b8657..a694edb4c 100644 --- a/tests/test_plan_refsols/simple_cross_3.txt +++ b/tests/test_plan_refsols/simple_cross_3.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_nation', anything_supplier_nation), ('customer_nation', anything_n_name), ('nation_combinations', n_rows)], orderings=[]) AGGREGATE(keys={'key_5': key_5, 'key_8': key_8, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_supplier_nation': ANYTHING(supplier_nation), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) - JOIN(condition=t0.key_8 == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) - JOIN(condition=t0.key_5 == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'key_5': t0.key_5, 'key_8': t1.n_nationkey, 'n_name': t1.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.n_name}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'key_5': t1.r_regionkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) + JOIN(condition=t0.key_8 == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) + JOIN(condition=t0.key_5 == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'key_5': t0.key_5, 'key_8': t1.n_nationkey, 'n_name': t1.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.n_name}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_5': t1.r_regionkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_regionkey': t0.r_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_4.txt b/tests/test_plan_refsols/simple_cross_4.txt index ed891e481..0dd20b88a 100644 --- a/tests/test_plan_refsols/simple_cross_4.txt +++ b/tests/test_plan_refsols/simple_cross_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n_other_regions', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_5.txt b/tests/test_plan_refsols/simple_cross_5.txt index 20572dfc8..76c51fc6b 100644 --- a/tests/test_plan_refsols/simple_cross_5.txt +++ b/tests/test_plan_refsols/simple_cross_5.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ('best_order_priority_qty', total_qty)], orderings=[(p_size):asc_first]) - JOIN(condition=t0.p_size == t1.p_size, type=LEFT, cardinality=SINGULAR_FILTER, columns={'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size, 'total_qty': t1.total_qty}) + JOIN(condition=t0.p_size == t1.p_size, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size, 'total_qty': t1.total_qty}) LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) @@ -7,9 +7,9 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ( PROJECT(columns={'o_orderpriority': o_orderpriority, 'p_size': p_size, 'total_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) FILTER(condition=RANKING(args=[], partition=[p_size], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'o_orderpriority': o_orderpriority, 'p_size': p_size, 'sum_l_quantity': sum_l_quantity}) AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'p_size': p_size}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size}) + JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size}) LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_cross_6.txt b/tests/test_plan_refsols/simple_cross_6.txt index bfd9474c3..20c725da5 100644 --- a/tests/test_plan_refsols/simple_cross_6.txt +++ b/tests/test_plan_refsols/simple_cross_6.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_pairs', n_pairs)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_pairs': COUNT()}) - JOIN(condition=t1.o_custkey == t0.o_custkey & t1.o_orderdate == t0.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t1.o_custkey == t0.o_custkey & t1.o_orderdate == t0.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) FILTER(condition=INTEGER(SLICE(o_clerk, 6:numeric, None:unknown, None:unknown)) >= 900:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=INTEGER(SLICE(o_clerk, 6:numeric, None:unknown, None:unknown)) >= 900:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_cross_7.txt b/tests/test_plan_refsols/simple_cross_7.txt index 370589ba7..78f1f71f4 100644 --- a/tests/test_plan_refsols/simple_cross_7.txt +++ b/tests/test_plan_refsols/simple_cross_7.txt @@ -1,9 +1,9 @@ ROOT(columns=[('original_order_key', o_orderkey), ('n_other_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (o_orderkey):asc_first], limit=5:numeric) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_orderkey': t0.o_orderkey}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_cross_8.txt b/tests/test_plan_refsols/simple_cross_8.txt index aee7a052f..b40fe6b0e 100644 --- a/tests/test_plan_refsols/simple_cross_8.txt +++ b/tests/test_plan_refsols/simple_cross_8.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_region', anything_supplier_region), ('customer_region', customer_region), ('region_combinations', region_combinations)], orderings=[]) AGGREGATE(keys={'key_2': key_2, 'r_regionkey': r_regionkey}, aggregations={'anything_supplier_region': ANYTHING(supplier_region), 'customer_region': ANYTHING(r_name), 'region_combinations': COUNT()}) FILTER(condition=name_18 == supplier_region, columns={'key_2': key_2, 'r_name': r_name, 'r_regionkey': r_regionkey, 'supplier_region': supplier_region}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'name_18': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t1.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'name_18': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t0.key_2, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t1.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -16,8 +16,8 @@ ROOT(columns=[('supplier_region', anything_supplier_region), ('customer_region', SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=MONTH(l_shipdate) == 3:numeric & YEAR(l_shipdate) == 1998:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) FILTER(condition=s_acctbal < 0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_9.txt b/tests/test_plan_refsols/simple_cross_9.txt index a1e3233b6..7a1abb072 100644 --- a/tests/test_plan_refsols/simple_cross_9.txt +++ b/tests/test_plan_refsols/simple_cross_9.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n1', n_name), ('n2', n2)], orderings=[(n_name):asc_first, (n2):asc_first], limit=10:numeric) - JOIN(condition=t0.n_name != t1.n_name & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n2': t1.n_name, 'n_name': t0.n_name}) - JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_name != t1.n_name & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n2': t1.n_name, 'n_name': t0.n_name}) + JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/simple_semi_1.txt b/tests/test_plan_refsols/simple_semi_1.txt index a7dcb04b6..98a13ec16 100644 --- a/tests/test_plan_refsols/simple_semi_1.txt +++ b/tests/test_plan_refsols/simple_semi_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', c_name)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/simple_semi_2.txt b/tests/test_plan_refsols/simple_semi_2.txt index c308de1b0..146ebf45e 100644 --- a/tests/test_plan_refsols/simple_semi_2.txt +++ b/tests/test_plan_refsols/simple_semi_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size < 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_var_std.txt b/tests/test_plan_refsols/simple_var_std.txt index af3b1bbe0..505bad30b 100644 --- a/tests/test_plan_refsols/simple_var_std.txt +++ b/tests/test_plan_refsols/simple_var_std.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('var', pop_var), ('std', pop_std), ('sample_var', sample_var), ('sample_std', sample_std), ('pop_var', pop_var), ('pop_std', pop_std)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) FILTER(condition=ISIN(n_name, ['ALGERIA', 'ARGENTINA']:array[unknown]), columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'pop_std': POPULATION_STD(s_acctbal), 'pop_var': POPULATION_VARIANCE(s_acctbal), 'sample_std': SAMPLE_STD(s_acctbal), 'sample_var': SAMPLE_VARIANCE(s_acctbal)}) diff --git a/tests/test_plan_refsols/singular1.txt b/tests/test_plan_refsols/singular1.txt index db0c193a7..665f6f40c 100644 --- a/tests/test_plan_refsols/singular1.txt +++ b/tests/test_plan_refsols/singular1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('nation_4_name', n_name)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=n_nationkey == 4:numeric, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/singular2.txt b/tests/test_plan_refsols/singular2.txt index dcdbced2c..294c7666f 100644 --- a/tests/test_plan_refsols/singular2.txt +++ b/tests/test_plan_refsols/singular2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', n_name), ('okey', o_orderkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) FILTER(condition=c_custkey == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=o_orderkey == 454791:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/singular3.txt b/tests/test_plan_refsols/singular3.txt index fca6acd8d..e3f6787ca 100644 --- a/tests/test_plan_refsols/singular3.txt +++ b/tests/test_plan_refsols/singular3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) LIMIT(limit=5:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index bd2891393..f8ce4600b 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/singular5.txt b/tests/test_plan_refsols/singular5.txt index bf6465b3d..44b340fcb 100644 --- a/tests/test_plan_refsols/singular5.txt +++ b/tests/test_plan_refsols/singular5.txt @@ -1,12 +1,12 @@ ROOT(columns=[('container', p_container), ('highest_price_ship_date', max_anything_l_shipdate)], orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first], limit=5:numeric) FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}) AGGREGATE(keys={'p_container': p_container}, aggregations={'max_anything_l_shipdate': MAX(anything_l_shipdate), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) + JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) AGGREGATE(keys={'p_partkey': p_partkey}, aggregations={'anything_l_shipdate': ANYTHING(l_shipdate), 'n_rows': COUNT()}) FILTER(condition=RANKING(args=[], partition=[p_container], order=[(l_extendedprice):desc_first, (l_shipdate):asc_last]) == 1:numeric, columns={'l_shipdate': l_shipdate, 'p_partkey': p_partkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/singular6.txt b/tests/test_plan_refsols/singular6.txt index c118e82c9..a547c658e 100644 --- a/tests/test_plan_refsols/singular6.txt +++ b/tests/test_plan_refsols/singular6.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', n_name)], orderings=[(l_receiptdate):asc_first, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) FILTER(condition=c_nationkey == 4:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_receiptdate):asc_last, (l_extendedprice * 1:numeric - l_discount):desc_first]) == 1:numeric, columns={'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_clerk == 'Clerk#000000017':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 105329e60..9bf93569a 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,11 +1,11 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(n_rows, 0:numeric)):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/singular_anti.txt b/tests/test_plan_refsols/singular_anti.txt index de7e0a463..7de3c3eec 100644 --- a/tests/test_plan_refsols/singular_anti.txt +++ b/tests/test_plan_refsols/singular_anti.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('region_name', None:unknown)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/singular_semi.txt b/tests/test_plan_refsols/singular_semi.txt index 5a46a6430..7c881d169 100644 --- a/tests/test_plan_refsols/singular_semi.txt +++ b/tests/test_plan_refsols/singular_semi.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('region_name', r_name)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/smoke_d.txt b/tests/test_plan_refsols/smoke_d.txt index 1edf329d2..6423c59ff 100644 --- a/tests/test_plan_refsols/smoke_d.txt +++ b/tests/test_plan_refsols/smoke_d.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', c_custkey), ('a', RANKING(args=[], partition=[], order=[(c_acctbal):asc_last, (c_custkey):asc_last])), ('b', RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last])), ('c', RANKING(args=[], partition=[], order=[(c_mktsegment):asc_last], allow_ties=True)), ('d', RANKING(args=[], partition=[], order=[(c_mktsegment):asc_last], allow_ties=True, dense=True)), ('e', PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last, (c_custkey):asc_last])), ('f', PERCENTILE(args=[], partition=[c_nationkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=12)), ('g', PREV(args=[c_custkey], partition=[], order=[(c_custkey):asc_last])), ('h', PREV(args=[c_custkey], partition=[c_nationkey], order=[(c_custkey):asc_last], n=2, default=-1)), ('i', NEXT(args=[c_custkey], partition=[], order=[(c_custkey):asc_last])), ('j', NEXT(args=[c_custkey], partition=[c_nationkey], order=[(c_custkey):asc_last], n=6000)), ('k', RELSUM(args=[c_acctbal], partition=[c_nationkey], order=[])), ('l', RELSUM(args=[c_acctbal], partition=[], order=[(c_custkey):asc_last], cumulative=True)), ('m', ROUND(RELAVG(args=[c_acctbal], partition=[], order=[]), 2:numeric)), ('n', ROUND(RELAVG(args=[c_acctbal], partition=[c_nationkey], order=[(c_custkey):asc_last], frame=(None, -1)), 2:numeric)), ('o', RELCOUNT(args=[KEEP_IF(c_acctbal, c_acctbal > 0:numeric)], partition=[], order=[])), ('p', RELSIZE(args=[], partition=[], order=[]))], orderings=[(c_custkey):asc_first], limit=10:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_nationkey': t1.c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_combine_strings.txt b/tests/test_plan_refsols/sqlite_udf_combine_strings.txt index b88dc1c8a..5a772b6e9 100644 --- a/tests/test_plan_refsols/sqlite_udf_combine_strings.txt +++ b/tests/test_plan_refsols/sqlite_udf_combine_strings.txt @@ -1,6 +1,6 @@ ROOT(columns=[('s1', s1), ('s2', s2), ('s3', s3), ('s4', s4)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'s1': t0.s1, 's2': t0.s2, 's3': t0.s3, 's4': t1.s4}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s1': t0.s1, 's2': t0.s2, 's3': t1.s3}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s1': t0.s1, 's2': t0.s2, 's3': t0.s3, 's4': t1.s4}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'s1': t0.s1, 's2': t0.s2, 's3': t1.s3}) AGGREGATE(keys={}, aggregations={'s1': COMBINE_STRINGS(r_name), 's2': COMBINE_STRINGS(KEEP_IF(r_name, r_name != 'EUROPE':string), ', ':string)}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) AGGREGATE(keys={}, aggregations={'s3': COMBINE_STRINGS(SLICE(n_name, None:unknown, 1:numeric, None:unknown), '':string)}) diff --git a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt index 380033384..2eff260ee 100644 --- a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_cust', n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=EPSILON(c_acctbal, avg_balance, avg_balance * 0.1:numeric), columns={'n_regionkey': n_regionkey}) PROJECT(columns={'avg_balance': RELAVG(args=[c_acctbal], partition=[n_regionkey], order=[]), 'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt index 978f366b2..3adfbe06d 100644 --- a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt +++ b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt @@ -1,9 +1,9 @@ ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', ROUND(agg_0, 3:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, o_totalprice / 1000000.0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_nested.txt b/tests/test_plan_refsols/sqlite_udf_nested.txt index 23a6c9aeb..ca252a32f 100644 --- a/tests/test_plan_refsols/sqlite_udf_nested.txt +++ b/tests/test_plan_refsols/sqlite_udf_nested.txt @@ -2,7 +2,7 @@ ROOT(columns=[('p', ROUND(percentage_expr_1, 2:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'percentage_expr_1': PERCENTAGE(DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool))}) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'min_o_orderdate': MIN(o_orderdate), 'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/sqlite_udf_nval.txt b/tests/test_plan_refsols/sqlite_udf_nval.txt index 6435249df..67f0c0f2f 100644 --- a/tests/test_plan_refsols/sqlite_udf_nval.txt +++ b/tests/test_plan_refsols/sqlite_udf_nval.txt @@ -1,4 +1,4 @@ ROOT(columns=[('rname', r_name), ('nname', n_name), ('v1', NVAL(args=[n_name, 3:numeric], partition=[], order=[(n_name):asc_last])), ('v2', NVAL(args=[n_name, 1:numeric], partition=[n_regionkey], order=[(n_name):asc_last])), ('v3', NVAL(args=[n_name, 2:numeric], partition=[n_regionkey], order=[(n_name):asc_last], frame=(1, None))), ('v4', NVAL(args=[n_name, 5:numeric], partition=[], order=[(n_name):asc_last], cumulative=True))], orderings=[(r_name):asc_first, (n_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt index c0b287617..b2444dc7c 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', r_name), ('pct_cust_positive', ROUND(percentage_expr_2, 2:numeric)), ('pct_supp_positive', ROUND(percentage_expr_3, 2:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(POSITIVE(c_acctbal))}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(POSITIVE(s_acctbal))}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index e759b898f..b9e46e082 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('n_shipments', n_rows)], orderings=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (s_name):asc_first], limit=3:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index 6ec721f0f..58a14443e 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,14 +1,14 @@ ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=s_acctbal >= 0.0:numeric & CONTAINS(s_comment, 'careful':string), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) FILTER(condition=YEAR(l_shipdate) == 1995:numeric & l_shipmode == 'SHIP':string, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) FILTER(condition=CONTAINS(p_name, 'tomato':string) & STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/suppliers_bal_diffs.txt b/tests/test_plan_refsols/suppliers_bal_diffs.txt index 998e0dde9..5257873f3 100644 --- a/tests/test_plan_refsols/suppliers_bal_diffs.txt +++ b/tests/test_plan_refsols/suppliers_bal_diffs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]))], orderings=[(s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last])):desc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index 2e3d7779c..00dcdb1fe 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,13 +1,13 @@ ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) + JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'in_device_id': t0.in_device_id}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) diff --git a/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt b/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt index 58825c242..0a986eb55 100644 --- a/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt +++ b/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', co_name), ('n_other_countries', n_other_countries)], orderings=[(co_name):asc_first]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_other_countries': t1.n_other_countries}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_other_countries': t1.n_other_countries}) SCAN(table=main.COUNTRIES, columns={'co_name': co_name}) AGGREGATE(keys={}, aggregations={'n_other_countries': COUNT()}) SCAN(table=main.COUNTRIES, columns={}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 9db93bb71..f81eb8d36 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,12 +1,12 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_FILTER, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) + JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index 398921e1f..1cf03558d 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,21 +1,21 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'de_purchase_country_id': de_purchase_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) - JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt index f8470f2bb..b66d05d00 100644 --- a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt +++ b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt @@ -1,10 +1,10 @@ ROOT(columns=[('error', er_name), ('pct', ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[(ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric)):desc_last]) - JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) AGGREGATE(keys={'in_error_id': in_error_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_error_id': t0.in_error_id}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'in_error_id': t0.in_error_id}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index b1510489f..b2c4d2f9a 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,10 +1,10 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_incidents, 0:numeric)}) AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_global_incident_rate.txt b/tests/test_plan_refsols/technograph_global_incident_rate.txt index a6d1dd70c..5d6365fb4 100644 --- a/tests/test_plan_refsols/technograph_global_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_global_incident_rate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('ir', ROUND(n_rows / agg_1, 2:numeric))], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_hot_purchase_window.txt b/tests/test_plan_refsols/technograph_hot_purchase_window.txt index 45b2922d1..f325373f3 100644 --- a/tests/test_plan_refsols/technograph_hot_purchase_window.txt +++ b/tests/test_plan_refsols/technograph_hot_purchase_window.txt @@ -1,7 +1,7 @@ ROOT(columns=[('start_of_period', ca_dt), ('n_purchases', n_purchases)], orderings=[(n_purchases):desc_last, (ca_dt):asc_first], limit=1:numeric) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_purchases': COUNT()}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t1.ca_dt < DATETIME(t0.ca_dt, '+5 days':string) & t1.ca_dt >= t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t1.ca_dt < DATETIME(t0.ca_dt, '+5 days':string) & t1.ca_dt >= t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) FILTER(condition=YEAR(ca_dt) == 2024:numeric, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index 052d1893e..582f162a3 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -1,13 +1,13 @@ ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric))], orderings=[(release_year):asc_first]) - JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'pr_release': t0.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index 8cd9a28ea..7af10fc70 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,7 +1,7 @@ ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 701f153d4..80a9a18fe 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,13 +1,13 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) AGGREGATE(keys={'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) @@ -15,12 +15,12 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0 FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 2bd09c500..7abb7f5b8 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,8 +1,8 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) + JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index d7cadbc00..601a5a746 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -1,24 +1,24 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_4, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_n_rows, 0:numeric)), ('incidents', DEFAULT_TO(sum_expr_4, 0:numeric))], orderings=[(year - YEAR(release_date)):asc_first]) - JOIN(condition=YEAR(t0.release_date) <= t1.year_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year_1}) + JOIN(condition=YEAR(t0.release_date) <= t1.year_1, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year_1}) AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) AGGREGATE(keys={'year_1': YEAR(ca_dt)}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 2f06f93d8..b1f19bec2 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,14 +1,14 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_3, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_expr_3, 0:numeric)), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) FILTER(condition=DEFAULT_TO(sum_expr_3, 0:numeric) > 0:numeric, columns={'sum_expr_3': sum_expr_3, 'sum_n_rows': sum_n_rows, 'year': year}) AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt index b24f78d73..716f03473 100644 --- a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt +++ b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt index 7d8c385c4..5983915cd 100644 --- a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt +++ b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_customers_by_orders.txt b/tests/test_plan_refsols/top_customers_by_orders.txt index 8d72b9471..f9ae80b9e 100644 --- a/tests/test_plan_refsols/top_customers_by_orders.txt +++ b/tests/test_plan_refsols/top_customers_by_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('customer_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_custkey):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index 71b4e801c..5bd99fb20 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,9 +1,9 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (c_custkey):asc_first], limit=20:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 802eb8aa3..a10aea7bc 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,16 +1,16 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(DEFAULT_TO(sum_expr_2, 0:numeric)):desc_last], limit=10:numeric) - JOIN(condition=DEFAULT_TO(t1.sum_expr_2, 0:numeric) > DEFAULT_TO(t0.sum_metric, 0:numeric) * 0.0001:numeric, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2}) + JOIN(condition=DEFAULT_TO(t1.sum_expr_2, 0:numeric) > DEFAULT_TO(t0.sum_metric, 0:numeric) * 0.0001:numeric, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(ps_supplycost * ps_availqty)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q12.txt b/tests/test_plan_refsols/tpch_q12.txt index e12415ca9..1d3fec99b 100644 --- a/tests/test_plan_refsols/tpch_q12.txt +++ b/tests/test_plan_refsols/tpch_q12.txt @@ -1,6 +1,6 @@ ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', DEFAULT_TO(sum_is_high_priority, 0:numeric)), ('LOW_LINE_COUNT', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(l_shipmode):asc_first]) AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]))), 'sum_is_high_priority': SUM(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]))}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/tpch_q13.txt b/tests/test_plan_refsols/tpch_q13.txt index ecf4d93a1..2646b1149 100644 --- a/tests/test_plan_refsols/tpch_q13.txt +++ b/tests/test_plan_refsols/tpch_q13.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last], limit=10:numeric) AGGREGATE(keys={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}, aggregations={'CUSTDIST': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q14.txt b/tests/test_plan_refsols/tpch_q14.txt index ea3385773..c4c8c588b 100644 --- a/tests/test_plan_refsols/tpch_q14.txt +++ b/tests/test_plan_refsols/tpch_q14.txt @@ -1,6 +1,6 @@ ROOT(columns=[('PROMO_REVENUE', 100.0:numeric * DEFAULT_TO(sum_promo_value, 0:numeric) / DEFAULT_TO(sum_value, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_promo_value': SUM(IFF(STARTSWITH(p_type, 'PROMO':string), l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'p_type': t1.p_type}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'p_type': t1.p_type}) FILTER(condition=MONTH(l_shipdate) == 9:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index b42afa376..b7aa4880a 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,8 +1,8 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[(s_suppkey):asc_first]) - JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) + JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'max_revenue': MAX(DEFAULT_TO(sum_expr_2, 0:numeric))}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(l_extendedprice * 1:numeric - l_discount)}) FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q16.txt b/tests/test_plan_refsols/tpch_q16.txt index 07c0cfd97..8d7d5de72 100644 --- a/tests/test_plan_refsols/tpch_q16.txt +++ b/tests/test_plan_refsols/tpch_q16.txt @@ -1,7 +1,7 @@ ROOT(columns=[('P_BRAND', p_brand), ('P_TYPE', p_type), ('P_SIZE', p_size), ('SUPPLIER_COUNT', SUPPLIER_COUNT)], orderings=[(SUPPLIER_COUNT):desc_last, (p_brand):asc_first, (p_type):asc_first, (p_size):asc_first], limit=10:numeric) AGGREGATE(keys={'p_brand': p_brand, 'p_size': p_size, 'p_type': p_type}, aggregations={'SUPPLIER_COUNT': NDISTINCT(ps_suppkey)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_brand': t1.p_brand, 'p_size': t1.p_size, 'p_type': t1.p_type, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_brand': t1.p_brand, 'p_size': t1.p_size, 'p_type': t1.p_type, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=NOT(LIKE(s_comment, '%Customer%Complaints%':string)), columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_comment': s_comment, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q17.txt b/tests/test_plan_refsols/tpch_q17.txt index 25ea91c57..955986183 100644 --- a/tests/test_plan_refsols/tpch_q17.txt +++ b/tests/test_plan_refsols/tpch_q17.txt @@ -1,7 +1,7 @@ ROOT(columns=[('AVG_YEARLY', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / 7.0:numeric)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) FILTER(condition=l_quantity < 0.2:numeric * RELAVG(args=[l_quantity], partition=[l_partkey], order=[]), columns={'l_extendedprice': l_extendedprice}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity}) FILTER(condition=p_brand == 'Brand#23':string & p_container == 'MED BOX':string, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index 8acc50868..9a3b55e20 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=DEFAULT_TO(sum_l_quantity, 0:numeric) > 300:numeric, columns={'l_orderkey': l_orderkey, 'sum_l_quantity': sum_l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q19.txt b/tests/test_plan_refsols/tpch_q19.txt index 5f7aaab71..c3a94e9b4 100644 --- a/tests/test_plan_refsols/tpch_q19.txt +++ b/tests/test_plan_refsols/tpch_q19.txt @@ -1,6 +1,6 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index 135018c23..e097c9898 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -1,13 +1,13 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first], limit=10:numeric) FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 240330953..a09dd5caf 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -1,13 +1,13 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) - JOIN(condition=t0.ps_partkey == t1.p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 8a3ad16ad..05eafcbc8 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,21 +1,21 @@ ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) - JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) + JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 64cf83a90..dbca0c73d 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,8 +1,8 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) - JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) + JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index fa8154ac2..0a7c9f97d 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first], limit=10:numeric) AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/tpch_q4.txt b/tests/test_plan_refsols/tpch_q4.txt index 633e0d21e..c2fe0f9a6 100644 --- a/tests/test_plan_refsols/tpch_q4.txt +++ b/tests/test_plan_refsols/tpch_q4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('O_ORDERPRIORITY', o_orderpriority), ('ORDER_COUNT', ORDER_COUNT)], orderings=[(o_orderpriority):asc_first]) AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={'ORDER_COUNT': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderpriority': t0.o_orderpriority}) FILTER(condition=QUARTER(o_orderdate) == 3:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_commitdate < l_receiptdate, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index 4ecd8c14c..6e4183624 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,10 +1,10 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -12,6 +12,6 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:n FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index 14542215b..813d42b01 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,14 +1,14 @@ ROOT(columns=[('SUPP_NATION', supp_nation), ('CUST_NATION', n_name), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(supp_nation):asc_first, (n_name):asc_first, (l_year):asc_first]) AGGREGATE(keys={'l_year': YEAR(l_shipdate), 'n_name': n_name, 'supp_nation': supp_nation}, aggregations={'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey & t0.n_name == 'FRANCE':string & t1.n_name == 'GERMANY':string | t0.n_name == 'GERMANY':string & t1.n_name == 'FRANCE':string, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supp_nation': t0.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey & t0.n_name == 'FRANCE':string & t1.n_name == 'GERMANY':string | t0.n_name == 'GERMANY':string & t1.n_name == 'FRANCE':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supp_nation': t0.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q8.txt b/tests/test_plan_refsols/tpch_q8.txt index e19874147..f27de3c5c 100644 --- a/tests/test_plan_refsols/tpch_q8.txt +++ b/tests/test_plan_refsols/tpch_q8.txt @@ -1,20 +1,20 @@ ROOT(columns=[('O_YEAR', O_YEAR), ('MKT_SHARE', DEFAULT_TO(sum_brazil_volume, 0:numeric) / DEFAULT_TO(sum_volume, 0:numeric))], orderings=[]) AGGREGATE(keys={'O_YEAR': YEAR(o_orderdate)}, aggregations={'sum_brazil_volume': SUM(IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) FILTER(condition=ISIN(YEAR(o_orderdate), [1995, 1996]:array[unknown]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index b82a25527..d032c5bc1 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -1,13 +1,13 @@ ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last], limit=10:numeric) AGGREGATE(keys={'n_name': n_name, 'o_year': YEAR(o_orderdate)}, aggregations={'sum_value': SUM(l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index 3d481e5fb..067b2808d 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -2,24 +2,24 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[( AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / sum_n_instances)}) AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': SUM(n_instances)}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_instances': t0.n_instances, 'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_instances': t0.n_instances, 'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) AGGREGATE(keys={'o_custkey': o_custkey, 'p_type': p_type, 'r_name': r_name}, aggregations={'n_instances': COUNT()}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index 49fed6fa7..c98d2d967 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal_1': COUNT(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'min_c_acctbal_1': MIN(c_acctbal), 'n_rows_1': COUNT(), 'sum_c_acctbal_1': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index af0c8e206..3e326b484 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 28100ecde..dad30206d 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_totalprice': t0.o_totalprice}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index af0c8e206..3e326b484 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index af0c8e206..3e326b484 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_4.txt b/tests/test_plan_refsols/window_filter_order_4.txt index 000297593..f4240ff57 100644 --- a/tests/test_plan_refsols/window_filter_order_4.txt +++ b/tests/test_plan_refsols/window_filter_order_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_5.txt b/tests/test_plan_refsols/window_filter_order_5.txt index 71a43eb3f..3aff8fad4 100644 --- a/tests/test_plan_refsols/window_filter_order_5.txt +++ b/tests/test_plan_refsols/window_filter_order_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELAVG(args=[DEFAULT_TO(c_acctbal, 0:numeric)], partition=[], order=[]) & PRESENT(expr_0), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'expr_0': t1.expr_0}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'expr_0': t1.expr_0}) FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'expr_0': 1:numeric}) diff --git a/tests/test_plan_refsols/window_filter_order_6.txt b/tests/test_plan_refsols/window_filter_order_6.txt index 71a43eb3f..3aff8fad4 100644 --- a/tests/test_plan_refsols/window_filter_order_6.txt +++ b/tests/test_plan_refsols/window_filter_order_6.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELAVG(args=[DEFAULT_TO(c_acctbal, 0:numeric)], partition=[], order=[]) & PRESENT(expr_0), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'expr_0': t1.expr_0}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'expr_0': t1.expr_0}) FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'expr_0': 1:numeric}) diff --git a/tests/test_plan_refsols/window_filter_order_7.txt b/tests/test_plan_refsols/window_filter_order_7.txt index db0c030a8..4d41e7023 100644 --- a/tests/test_plan_refsols/window_filter_order_7.txt +++ b/tests/test_plan_refsols/window_filter_order_7.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELAVG(args=[c_acctbal], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal}) FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 8a6c6d52e..30462c7cd 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & ABSENT(n_rows), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_9.txt b/tests/test_plan_refsols/window_filter_order_9.txt index 0a1837981..642f47f0a 100644 --- a/tests/test_plan_refsols/window_filter_order_9.txt +++ b/tests/test_plan_refsols/window_filter_order_9.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[total_spent], partition=[], order=[]) & ABSENT(expr_0), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) PROJECT(columns={'c_custkey': c_custkey, 'expr_0': 1:numeric, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index ddf6fd292..c3c89419b 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,4 +1,4 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w2', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w3', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w6', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w7', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))), ('w8', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index 5b0a9e60f..dbbba0f08 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,4 +1,4 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w2', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w3', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w6', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w7', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))), ('w8', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/year_month_nation_orders.txt b/tests/test_plan_refsols/year_month_nation_orders.txt index b91e4cd3c..8af9a411d 100644 --- a/tests/test_plan_refsols/year_month_nation_orders.txt +++ b/tests/test_plan_refsols/year_month_nation_orders.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last], limit=5:numeric) AGGREGATE(keys={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) From 65697e89cfd9f1652566390c493ad2b1a3004829 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 25 Aug 2025 14:23:59 -0400 Subject: [PATCH 088/143] Added reverse cardinality based column pruning [RUN CI] [RUN MYSQL] --- pydough/conversion/filter_pushdown.py | 16 +++-- .../relational_nodes/column_pruner.py | 67 +++++++++++++------ .../access_partition_child_after_filter.txt | 6 +- ..._partition_child_filter_backref_filter.txt | 2 +- .../agg_parts_by_type_backref_global.txt | 2 +- .../aggregation_analytics_1.txt | 4 +- .../aggregation_analytics_2.txt | 2 +- .../aggregation_analytics_3.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_4.txt | 4 +- tests/test_plan_refsols/common_prefix_ag.txt | 4 +- tests/test_plan_refsols/common_prefix_ah.txt | 2 +- tests/test_plan_refsols/common_prefix_ai.txt | 2 +- tests/test_plan_refsols/common_prefix_aj.txt | 4 +- tests/test_plan_refsols/common_prefix_ak.txt | 4 +- tests/test_plan_refsols/common_prefix_al.txt | 4 +- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_an.txt | 4 +- tests/test_plan_refsols/common_prefix_ao.txt | 2 +- tests/test_plan_refsols/common_prefix_l.txt | 2 +- tests/test_plan_refsols/common_prefix_s.txt | 2 +- tests/test_plan_refsols/common_prefix_t.txt | 2 +- tests/test_plan_refsols/common_prefix_u.txt | 2 +- tests/test_plan_refsols/correl_11.txt | 2 +- tests/test_plan_refsols/correl_12.txt | 2 +- tests/test_plan_refsols/correl_17.txt | 2 +- tests/test_plan_refsols/correl_18.txt | 2 +- tests/test_plan_refsols/correl_19.txt | 4 +- tests/test_plan_refsols/correl_21.txt | 2 +- tests/test_plan_refsols/correl_22.txt | 2 +- tests/test_plan_refsols/correl_23.txt | 2 +- tests/test_plan_refsols/correl_24.txt | 2 +- tests/test_plan_refsols/correl_25.txt | 2 +- tests/test_plan_refsols/correl_27.txt | 4 +- tests/test_plan_refsols/correl_28.txt | 4 +- tests/test_plan_refsols/correl_29.txt | 8 +-- tests/test_plan_refsols/correl_3.txt | 2 +- tests/test_plan_refsols/correl_34.txt | 2 +- tests/test_plan_refsols/correl_36.txt | 56 +++++++--------- tests/test_plan_refsols/correl_4.txt | 2 +- tests/test_plan_refsols/correl_5.txt | 4 +- tests/test_plan_refsols/double_cross.txt | 4 +- .../epoch_culture_events_info.txt | 10 +-- .../epoch_events_per_season.txt | 2 +- .../epoch_intra_season_searches.txt | 6 +- .../epoch_num_predawn_cold_war.txt | 4 +- ...ping_event_search_other_users_per_user.txt | 2 +- .../epoch_pct_searches_per_tod.txt | 2 +- .../epoch_search_results_by_tod.txt | 2 +- .../test_plan_refsols/join_region_nations.txt | 4 +- .../join_region_nations_customers.txt | 6 +- .../lineitem_regional_shipments2.txt | 2 +- .../lines_german_supplier_economy_part.txt | 2 +- .../month_year_sliding_windows.txt | 14 ++-- .../mostly_positive_accounts_per_nation1.txt | 2 +- .../mostly_positive_accounts_per_nation2.txt | 2 +- .../mostly_positive_accounts_per_nation3.txt | 2 +- .../multi_partition_access_2.txt | 2 +- .../multi_partition_access_3.txt | 19 ++---- .../multi_partition_access_4.txt | 2 +- .../multi_partition_access_5.txt | 14 ++-- .../multi_partition_access_6.txt | 48 +++---------- .../nation_name_contains_region_name.txt | 2 +- .../odate_and_rdate_avggap.txt | 2 +- .../order_by_before_join.txt | 4 +- tests/test_plan_refsols/pagerank_a1.txt | 2 +- tests/test_plan_refsols/pagerank_a2.txt | 2 +- tests/test_plan_refsols/pagerank_a6.txt | 2 +- tests/test_plan_refsols/pagerank_b3.txt | 2 +- tests/test_plan_refsols/pagerank_c4.txt | 2 +- tests/test_plan_refsols/pagerank_d5.txt | 2 +- tests/test_plan_refsols/pagerank_h8.txt | 2 +- tests/test_plan_refsols/part_cross_part_a.txt | 2 +- tests/test_plan_refsols/part_cross_part_b.txt | 4 +- tests/test_plan_refsols/part_cross_part_c.txt | 4 +- .../percentile_customers_per_region.txt | 4 +- .../quantile_function_test_1.txt | 6 +- .../rank_customers_per_region.txt | 4 +- .../rank_nations_per_region_by_customers.txt | 4 +- .../region_nation_window_aggs.txt | 6 +- .../regional_suppliers_percentile.txt | 4 +- tests/test_plan_refsols/simple_cross_10.txt | 4 +- tests/test_plan_refsols/simple_cross_11.txt | 2 +- tests/test_plan_refsols/simple_cross_2.txt | 2 +- tests/test_plan_refsols/simple_cross_4.txt | 2 +- tests/test_plan_refsols/simple_cross_6.txt | 2 +- tests/test_plan_refsols/simple_cross_7.txt | 2 +- tests/test_plan_refsols/simple_cross_8.txt | 4 +- tests/test_plan_refsols/simple_cross_9.txt | 4 +- ...chnograph_country_combination_analysis.txt | 2 +- ...umulative_incident_rate_goldcopperstar.txt | 6 +- ..._year_cumulative_incident_rate_overall.txt | 4 +- tests/test_plan_refsols/tpch_q19.txt | 2 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- tests/test_plan_refsols/tpch_q22.txt | 4 +- tests/test_plan_refsols/tpch_q3.txt | 2 +- tests/test_plan_refsols/tpch_q7.txt | 4 +- tests/test_plan_refsols/tpch_q8.txt | 4 +- .../window_sliding_frame_relsize.txt | 4 +- .../window_sliding_frame_relsum.txt | 4 +- tests/test_pydough_from_string.py | 1 + tests/test_sql_refsols/correl_36_sqlite.sql | 23 ++----- .../test_sql_refsols/quantile_test_1_ansi.sql | 9 ++- .../quantile_test_1_mysql.sql | 11 ++- .../quantile_test_1_sqlite.sql | 11 ++- .../window_functions_ansi.sql | 4 +- .../window_functions_mysql.sql | 4 +- .../window_functions_sqlite.sql | 4 +- .../window_sliding_frame_relsize_ansi.sql | 24 +++---- .../window_sliding_frame_relsize_mysql.sql | 24 +++---- .../window_sliding_frame_relsize_sqlite.sql | 24 +++---- .../window_sliding_frame_relsum_ansi.sql | 24 +++---- .../window_sliding_frame_relsum_mysql.sql | 24 +++---- .../window_sliding_frame_relsum_sqlite.sql | 24 +++---- 115 files changed, 324 insertions(+), 413 deletions(-) diff --git a/pydough/conversion/filter_pushdown.py b/pydough/conversion/filter_pushdown.py index c08be33d2..e247bd132 100644 --- a/pydough/conversion/filter_pushdown.py +++ b/pydough/conversion/filter_pushdown.py @@ -185,6 +185,7 @@ def visit_join(self, join: Join) -> RelationalNode: # The join type, cardinality, and inputs for the output join node. join_type: JoinType = join.join_type cardinality: JoinCardinality = join.cardinality + reverse_cardinality: JoinCardinality = join.reverse_cardinality new_inputs: list[RelationalNode] = [] # If the join type is LEFT or SEMI but the condition is TRUE, convert it @@ -239,10 +240,15 @@ def visit_join(self, join: Join) -> RelationalNode: remaining_filters, lambda expr: only_references_columns(expr, input_cols[idx]), ) - # Ensure that if any filter is pushed into an input (besides - # the first input) that the join is marked as filtering. - if len(pushable_filters) > 0 and idx > 0: - cardinality = join.cardinality.add_filter() + # Ensure that if any filter is pushed into an input, the + # corresponding join cardinality is updated to reflect that a filter + # has been applied. + if len(pushable_filters) > 0: + if idx == 1: + cardinality = join.cardinality.add_filter() + else: + reverse_cardinality = reverse_cardinality.add_filter() + # Do the same pushable_filters = { expr.accept_shuttle(transposer) for expr in pushable_filters } @@ -270,6 +276,7 @@ def visit_join(self, join: Join) -> RelationalNode: else: new_conjunction.add(join._condition) cardinality = join.cardinality.add_filter() + reverse_cardinality = join.reverse_cardinality.add_filter() join._condition = RelationalExpression.form_conjunction( sorted(new_conjunction, key=repr) ) @@ -280,6 +287,7 @@ def visit_join(self, join: Join) -> RelationalNode: new_node = join.copy(inputs=new_inputs) assert isinstance(new_node, Join) new_node.cardinality = cardinality + new_node.reverse_cardinality = reverse_cardinality new_node.join_type = join_type return build_filter(new_node, remaining_filters) diff --git a/pydough/relational/relational_nodes/column_pruner.py b/pydough/relational/relational_nodes/column_pruner.py index 1d4dc28c7..a49f05b31 100644 --- a/pydough/relational/relational_nodes/column_pruner.py +++ b/pydough/relational/relational_nodes/column_pruner.py @@ -150,31 +150,56 @@ def _prune_node_columns( # Special case: replace LEFT join where RHS is unused with LHS (only # possible if the join is used to bring 1:1 data into the rows of the # LHS, which is unecessary if no data is being brought). Also do the - # same for inner joins that meet certain criteria. - if isinstance(output, Join) and ( - (output.join_type == JoinType.LEFT) - or ( + # same for inner joins that meet certain criteria. Do the same with + # inner joins where the left side is unused and the data is singular + # and non-filtering with regards to the right side. + if isinstance(output, Join): + prune_left: bool = ( + output.join_type == JoinType.INNER + and output.reverse_cardinality == JoinCardinality.SINGULAR_ACCESS + ) + prune_right: bool = (output.join_type == JoinType.LEFT) or ( output.join_type == JoinType.INNER and output.cardinality == JoinCardinality.SINGULAR_ACCESS ) - ): - uses_rhs: bool = False - for column in output.columns.values(): - if ( - isinstance(column, ColumnReference) - and column.input_name == output.default_input_aliases[1] - ): - uses_rhs = True - break - if not uses_rhs: + if prune_left or prune_right: + uses_lhs: bool = False + uses_rhs: bool = False + for column in output.columns.values(): + if ( + isinstance(column, ColumnReference) + and column.input_name == output.default_input_aliases[0] + ): + uses_lhs = True + if ( + isinstance(column, ColumnReference) + and column.input_name == output.default_input_aliases[1] + ): + uses_rhs = True + if uses_lhs and uses_rhs: + break + new_columns: dict[str, RelationalExpression] = {} - for column_name, column_val in output.columns.items(): - assert isinstance(column_val, ColumnReference) - new_columns[column_name] = output.inputs[0].columns[column_val.name] - if isinstance(output.inputs[0], Aggregate): - for key in output.inputs[0].keys: - new_columns[key] = output.inputs[0].keys[key] - output = output.inputs[0].copy(columns=new_columns) + if prune_right and not uses_rhs: + for column_name, column_val in output.columns.items(): + assert isinstance(column_val, ColumnReference) + new_columns[column_name] = output.inputs[0].columns[ + column_val.name + ] + if isinstance(output.inputs[0], Aggregate): + for key in output.inputs[0].keys: + new_columns[key] = output.inputs[0].keys[key] + output = output.inputs[0].copy(columns=new_columns) + elif prune_left and not uses_lhs: + for column_name, column_val in output.columns.items(): + assert isinstance(column_val, ColumnReference) + new_columns[column_name] = output.inputs[1].columns[ + column_val.name + ] + if isinstance(output.inputs[1], Aggregate): + for key in output.inputs[1].keys: + new_columns[key] = output.inputs[1].keys[key] + output = output.inputs[1].copy(columns=new_columns) return output, correl_refs diff --git a/tests/test_plan_refsols/access_partition_child_after_filter.txt b/tests/test_plan_refsols/access_partition_child_after_filter.txt index 76c2d9652..e0889ae05 100644 --- a/tests/test_plan_refsols/access_partition_child_after_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_after_filter.txt @@ -1,6 +1,2 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) - FILTER(condition=avg_p_retailprice > 27.5:numeric, columns={'p_type': p_type}) - AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt b/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt index 9527b7c1a..fd9934912 100644 --- a/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - JOIN(condition=t1.p_retailprice < t0.avg_price & t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + JOIN(condition=t1.p_retailprice < t0.avg_price & t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt b/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt index 3a3ff6bd6..96764d11e 100644 --- a/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt +++ b/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_type', p_type), ('percentage_of_parts', n_rows / total_num_parts), ('avg_price', avg_p_retailprice)], orderings=[]) - JOIN(condition=t1.avg_p_retailprice >= t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 'p_type': t1.p_type, 'total_num_parts': t0.total_num_parts}) + JOIN(condition=t1.avg_p_retailprice >= t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 'p_type': t1.p_type, 'total_num_parts': t0.total_num_parts}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice), 'total_num_parts': COUNT()}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/aggregation_analytics_1.txt b/tests/test_plan_refsols/aggregation_analytics_1.txt index 58e0deb0c..5313d48d0 100644 --- a/tests/test_plan_refsols/aggregation_analytics_1.txt +++ b/tests/test_plan_refsols/aggregation_analytics_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=8:numeric) JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) @@ -9,7 +9,7 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_ SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index cb721fe2a..2ef53d9c2 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -2,7 +2,7 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_ JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 8d3788bf3..339aaf5d5 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -2,7 +2,7 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_reve JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index a0a95a823..452929916 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -2,8 +2,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index a0a95a823..452929916 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -2,8 +2,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index da2b4c0c0..edcdf502e 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,8 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index a3e99e82a..0ac56433a 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index 1e859202f..8fa8c59a8 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index 4448ee711..38b744b45 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index ca09a6735..d610b63d5 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index ab4e235dd..733673099 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index f7b11e162..f674f8512 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -4,9 +4,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 1ed0ccd60..eb6b60f1f 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -3,9 +3,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numer LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=sum_agg_3 > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 396066d64..cfab97d0b 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -3,9 +3,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 91bfe0dcf..508b413ff 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -4,7 +4,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric) LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 166af0404..c03ee50fe 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 85f114b4e..2dbcd5430 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', n_rows), ('most_recent_order_distinct', ndistinct_l_suppkey)], orderings=[(c_name):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 4592b0af2..873834947 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 8bb6e9ff8..1a4c5e05e 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_11.txt b/tests/test_plan_refsols/correl_11.txt index b2cdc198b..03eea0352 100644 --- a/tests/test_plan_refsols/correl_11.txt +++ b/tests/test_plan_refsols/correl_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('brand', p_brand)], orderings=[(p_brand):asc_first]) AGGREGATE(keys={'p_brand': p_brand}, aggregations={}) - JOIN(condition=t0.p_brand == t1.p_brand & t1.p_retailprice > 1.4:numeric * t0.avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_brand': t0.p_brand}) + JOIN(condition=t0.p_brand == t1.p_brand & t1.p_retailprice > 1.4:numeric * t0.avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_brand': t0.p_brand}) AGGREGATE(keys={'p_brand': p_brand}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_12.txt b/tests/test_plan_refsols/correl_12.txt index 3fceabdc2..dcf0d68f6 100644 --- a/tests/test_plan_refsols/correl_12.txt +++ b/tests/test_plan_refsols/correl_12.txt @@ -1,6 +1,6 @@ ROOT(columns=[('brand', p_brand)], orderings=[(p_brand):asc_first]) AGGREGATE(keys={'p_brand': p_brand}, aggregations={}) - JOIN(condition=t1.p_retailprice < t0.global_avg_price & t0.p_brand == t1.p_brand & t1.p_retailprice > t0.brand_avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_brand': t0.p_brand}) + JOIN(condition=t1.p_retailprice < t0.global_avg_price & t0.p_brand == t1.p_brand & t1.p_retailprice > t0.brand_avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_brand': t0.p_brand}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'brand_avg_price': t1.brand_avg_price, 'global_avg_price': t0.global_avg_price, 'p_brand': t1.p_brand}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 7c1c086c4..a62a84c23 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,4 +1,4 @@ ROOT(columns=[('fullname', JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name)))], orderings=[(JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))):asc_first]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 75910fb98..ff21980c3 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', sum_n_above_avg)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': COUNT()}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_totalprice >= 0.5:numeric * DEFAULT_TO(t0.sum_o_totalprice, 0:numeric), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_totalprice >= 0.5:numeric * DEFAULT_TO(t0.sum_o_totalprice, 0:numeric), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index 453727b40..99566c0f2 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,7 +1,7 @@ ROOT(columns=[('supplier_name', supplier_name), ('n_super_cust', n_super_cust)], orderings=[(n_super_cust):desc_last], limit=5:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'n_super_cust': COUNT(), 'supplier_name': ANYTHING(s_name)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_21.txt b/tests/test_plan_refsols/correl_21.txt index 8a6cdde8d..2173cadb5 100644 --- a/tests/test_plan_refsols/correl_21.txt +++ b/tests/test_plan_refsols/correl_21.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) - JOIN(condition=t1.n_rows > t0.avg_n_parts, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + JOIN(condition=t1.n_rows > t0.avg_n_parts, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) AGGREGATE(keys={}, aggregations={'avg_n_parts': AVG(n_parts)}) AGGREGATE(keys={'p_size': p_size}, aggregations={'n_parts': COUNT()}) SCAN(table=tpch.PART, columns={'p_size': p_size}) diff --git a/tests/test_plan_refsols/correl_22.txt b/tests/test_plan_refsols/correl_22.txt index aad7e12c4..2d7675c4e 100644 --- a/tests/test_plan_refsols/correl_22.txt +++ b/tests/test_plan_refsols/correl_22.txt @@ -1,6 +1,6 @@ ROOT(columns=[('container', p_container), ('n_types', n_types)], orderings=[(n_types):desc_last, (p_container):asc_first], limit=5:numeric) AGGREGATE(keys={'p_container': p_container}, aggregations={'n_types': COUNT()}) - JOIN(condition=t1.avg_p_retailprice > t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_container': t1.p_container}) + JOIN(condition=t1.avg_p_retailprice > t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_container': t1.p_container}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_container': p_container, 'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_23.txt b/tests/test_plan_refsols/correl_23.txt index 8622ac925..1862339e7 100644 --- a/tests/test_plan_refsols/correl_23.txt +++ b/tests/test_plan_refsols/correl_23.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) - JOIN(condition=t1.n_rows > t0.avg_n_combo, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + JOIN(condition=t1.n_rows > t0.avg_n_combo, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) AGGREGATE(keys={}, aggregations={'avg_n_combo': AVG(n_combos)}) AGGREGATE(keys={'p_size': p_size}, aggregations={'n_combos': COUNT()}) AGGREGATE(keys={'p_container': p_container, 'p_size': p_size, 'p_type': p_type}, aggregations={}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index d5b649cbd..f93b3feb8 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year), ('month', month), ('n_orders_in_range', n_orders_in_range)], orderings=[(year):asc_first, (month):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_orders_in_range': COUNT()}) - JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'year': t0.year}) + JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_25.txt b/tests/test_plan_refsols/correl_25.txt index e71563e9c..0c7b3fa53 100644 --- a/tests/test_plan_refsols/correl_25.txt +++ b/tests/test_plan_refsols/correl_25.txt @@ -2,7 +2,7 @@ ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', r_regio AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name), 'n_urgent_semi_domestic_rail_orders': NDISTINCT(l_orderkey)}) JOIN(condition=t1.n_name != t0.n_name & t0.l_suppkey == t1.s_suppkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 4e8bf9fda..05490b4f1 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -3,8 +3,8 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index a4726946f..862466ad1 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -3,8 +3,8 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index fac5389cf..78b655da0 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,9 +1,9 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_rows_1), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) @@ -12,7 +12,7 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_n AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) diff --git a/tests/test_plan_refsols/correl_3.txt b/tests/test_plan_refsols/correl_3.txt index efeaf72d2..475b9a565 100644 --- a/tests/test_plan_refsols/correl_3.txt +++ b/tests/test_plan_refsols/correl_3.txt @@ -4,7 +4,7 @@ ROOT(columns=[('region_name', r_name), ('n_nations', DEFAULT_TO(n_rows, 0:numeri AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=r_regionkey == anything_n_regionkey, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey)}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index e10964b38..eba9a0a50 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={}) FILTER(condition=l_orderkey == o_orderkey & ps_partkey == l_partkey & ps_suppkey == l_suppkey & o_totalprice > RELAVG(args=[o_totalprice], partition=[l_linenumber, l_orderkey, ps_partkey, ps_suppkey], order=[]) | RELSIZE(args=[], partition=[l_partkey, l_suppkey], order=[]) == 1:numeric, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_36.txt b/tests/test_plan_refsols/correl_36.txt index bb518918c..6e6606de9 100644 --- a/tests/test_plan_refsols/correl_36.txt +++ b/tests/test_plan_refsols/correl_36.txt @@ -1,33 +1,27 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.key_12, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey}) - FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'key_12': key_12, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}, aggregations={}) - JOIN(condition=t0.p_type == t1.p_type & t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) - JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) - FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + AGGREGATE(keys={'key_12': key_12, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}, aggregations={}) + JOIN(condition=t0.p_type == t1.p_type & t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) + JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) + FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/correl_4.txt b/tests/test_plan_refsols/correl_4.txt index f8778c6d6..8f72568f4 100644 --- a/tests/test_plan_refsols/correl_4.txt +++ b/tests/test_plan_refsols/correl_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', n_name)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'smallest_bal': t0.smallest_bal}) AGGREGATE(keys={}, aggregations={'smallest_bal': MIN(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) diff --git a/tests/test_plan_refsols/correl_5.txt b/tests/test_plan_refsols/correl_5.txt index 1ae689fe3..8f28d21ea 100644 --- a/tests/test_plan_refsols/correl_5.txt +++ b/tests/test_plan_refsols/correl_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', name)], orderings=[(name):asc_first]) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'name': ANYTHING(r_name)}) - JOIN(condition=t1.s_acctbal <= t0.smallest_bal + 4.0:numeric & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'smallest_bal': t0.smallest_bal}) + JOIN(condition=t1.s_acctbal <= t0.smallest_bal + 4.0:numeric & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'smallest_bal': t0.smallest_bal}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey, 'smallest_bal': t0.smallest_bal}) AGGREGATE(keys={}, aggregations={'smallest_bal': MIN(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal}) diff --git a/tests/test_plan_refsols/double_cross.txt b/tests/test_plan_refsols/double_cross.txt index 0b5722202..52322ffa3 100644 --- a/tests/test_plan_refsols/double_cross.txt +++ b/tests/test_plan_refsols/double_cross.txt @@ -1,9 +1,9 @@ ROOT(columns=[('wk', ord_wk), ('n_lines', n_rows), ('n_orders', anything_n_orders), ('lpo', ROUND(RELSUM(args=[n_rows], partition=[], order=[(line_wk):asc_last], cumulative=True) / RELSUM(args=[anything_n_orders], partition=[], order=[(ord_wk):asc_last], cumulative=True), 4:numeric))], orderings=[(ord_wk):asc_first]) AGGREGATE(keys={'line_wk': DATEDIFF('week':string, min_date, l_receiptdate), 'ord_wk': ord_wk}, aggregations={'anything_n_orders': ANYTHING(n_orders), 'n_rows': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_date, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t1.l_receiptdate, 'min_date': t0.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) + JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_date, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_receiptdate': t1.l_receiptdate, 'min_date': t0.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'min_date': t1.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) AGGREGATE(keys={'ord_wk': DATEDIFF('week':string, min_date, o_orderdate)}, aggregations={'n_orders': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'min_date': t0.min_date, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'min_date': t0.min_date, 'o_orderdate': t1.o_orderdate}) AGGREGATE(keys={}, aggregations={'min_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) FILTER(condition=o_orderpriority == '1-URGENT':string & o_orderstatus == 'F':string, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index 63f0f6776..e26cfdcde 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,13 +1,13 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first], limit=6:numeric) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_events_per_season.txt b/tests/test_plan_refsols/epoch_events_per_season.txt index 3e35da6bd..7220d48f4 100644 --- a/tests/test_plan_refsols/epoch_events_per_season.txt +++ b/tests/test_plan_refsols/epoch_events_per_season.txt @@ -1,5 +1,5 @@ ROOT(columns=[('season_name', s_name), ('n_events', n_events)], orderings=[(n_events):desc_last, (s_name):asc_first]) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_events': COUNT()}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 471d3e119..18a067096 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,8 +1,8 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) @@ -16,7 +16,7 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numer AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(name_9 == s_name)}) JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'name_9': t1.s_name, 's_name': t0.s_name}) JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt index 225fc5ac2..a3a22d030 100644 --- a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt +++ b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n_events', n_events)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_events': COUNT()}) JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=t_name == 'Pre-Dawn':string, columns={'t_end_hour': t_end_hour, 't_start_hour': t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt index 3c0b8dc3a..22b8030f5 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt @@ -3,7 +3,7 @@ ROOT(columns=[('user_name', anything_user_name), ('n_other_users', n_other_users JOIN(condition=t1.user_name != t0.user_name & t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=EVENTS, columns={'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt index 88b48990a..25e99085d 100644 --- a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt +++ b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_start_hour': ANYTHING(t_start_hour), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_search_results_by_tod.txt b/tests/test_plan_refsols/epoch_search_results_by_tod.txt index a4f00ca05..67a223a35 100644 --- a/tests/test_plan_refsols/epoch_search_results_by_tod.txt +++ b/tests/test_plan_refsols/epoch_search_results_by_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)), ('avg_results', ROUND(avg_search_num_results, 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_start_hour': ANYTHING(t_start_hour), 'avg_search_num_results': AVG(search_num_results), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_num_results': search_num_results, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/join_region_nations.txt b/tests/test_plan_refsols/join_region_nations.txt index c319d0a25..192f6778a 100644 --- a/tests/test_plan_refsols/join_region_nations.txt +++ b/tests/test_plan_refsols/join_region_nations.txt @@ -1,4 +1,2 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_region_nations_customers.txt b/tests/test_plan_refsols/join_region_nations_customers.txt index 03148c27c..8f39f219c 100644 --- a/tests/test_plan_refsols/join_region_nations_customers.txt +++ b/tests/test_plan_refsols/join_region_nations_customers.txt @@ -1,6 +1,2 @@ ROOT(columns=[('key', c_custkey), ('name', c_name), ('address', c_address), ('nation_key', c_nationkey), ('phone', c_phone), ('account_balance', c_acctbal), ('market_segment', c_mktsegment), ('comment', c_comment)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_comment': t1.c_comment, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'c_phone': t1.c_phone}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments2.txt b/tests/test_plan_refsols/lineitem_regional_shipments2.txt index b79ec8de3..f90e87c44 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments2.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('rname', r_name), ('price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) + JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'r_name': t1.r_name}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'r_name': t1.r_name}) diff --git a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt index d8e5cd342..531abfce9 100644 --- a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt +++ b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt @@ -1,5 +1,5 @@ ROOT(columns=[('order_key', l_orderkey), ('ship_date', l_shipdate), ('extended_price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 132a9adcc..b8ba05d61 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,11 +1,5 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) - FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_1):asc_last, (month_1):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_1):asc_last, (month_1):asc_last], default=0.0), columns={'month': month_1, 'year': year_1}) + AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt index 84d08983f..e8baa1741 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt index 6b901bb6d..55cdf7f5d 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', count_s_suppkey), ('total_suppliers', count_s_suppkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index 2ab7abe26..6198ebc29 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 00be4afea..95593d4d1 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', cus_tick_typ_avg_shares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', cust_avg_shares)], orderings=[(sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index 2cd523e09..e2cf74fb0 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,19 +1,12 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) - JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) + JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'type_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) - AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpTickerId': t1.sbDpTickerId}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) - SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'ticker_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerSymbol': t0.sbTickerSymbol, 'sbTickerType': t0.sbTickerType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol, 'sbTickerType': sbTickerType}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 2efeb070b..ded2ae63f 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 3356a5d74..da5336fa0 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,13 +1,9 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', sum_n_ticker_type_trans_1), ('n_type_trans', sum_n_ticker_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) - JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) + JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index e793349dc..871bb5b9f 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -1,47 +1,17 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_ticker_type_trans == 1:numeric | t0.n_cust_type_trans == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_ticker_type_trans == 1:numeric | t0.n_cust_type_trans == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_cust_type_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/nation_name_contains_region_name.txt b/tests/test_plan_refsols/nation_name_contains_region_name.txt index 2244b257e..9d3c189f4 100644 --- a/tests/test_plan_refsols/nation_name_contains_region_name.txt +++ b/tests/test_plan_refsols/nation_name_contains_region_name.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey & CONTAINS(t1.n_name, t0.r_name), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey & CONTAINS(t1.n_name, t0.r_name), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/odate_and_rdate_avggap.txt b/tests/test_plan_refsols/odate_and_rdate_avggap.txt index 84a9d4f04..9606c3407 100644 --- a/tests/test_plan_refsols/odate_and_rdate_avggap.txt +++ b/tests/test_plan_refsols/odate_and_rdate_avggap.txt @@ -1,6 +1,6 @@ ROOT(columns=[('avg_gap', avg_gap)], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_gap': AVG(DATEDIFF('days':string, o_orderdate, SMALLEST(l_commitdate, l_receiptdate)))}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipmode': l_shipmode}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/order_by_before_join.txt b/tests/test_plan_refsols/order_by_before_join.txt index c319d0a25..192f6778a 100644 --- a/tests/test_plan_refsols/order_by_before_join.txt +++ b/tests/test_plan_refsols/order_by_before_join.txt @@ -1,4 +1,2 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index d7c5bd416..5e682b661 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -4,7 +4,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0, 5:numeric))], ord JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index 9f476f893..25ca533af 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -8,7 +8,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_20, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index e67d91867..f670d3451 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -24,7 +24,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_590, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index a55bcc47a..faf53451e 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -12,7 +12,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_58, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index cf9b305ab..523047895 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -16,7 +16,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_134, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index 382de6bab..b8ae5bde8 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -20,7 +20,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_286, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt index 8551440bf..91c7ce5f2 100644 --- a/tests/test_plan_refsols/pagerank_h8.txt +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -32,7 +32,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_2414, 5:numeric))] JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/part_cross_part_a.txt b/tests/test_plan_refsols/part_cross_part_a.txt index c4402eb78..90ec89d6c 100644 --- a/tests/test_plan_refsols/part_cross_part_a.txt +++ b/tests/test_plan_refsols/part_cross_part_a.txt @@ -7,7 +7,7 @@ ROOT(columns=[('state', sbCustState), ('exchange', sbTickerExchange), ('n', DEFA SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId, 'sbCustState': sbCustState}) AGGREGATE(keys={'sbCustId': sbCustId, 'sbTickerExchange': sbTickerExchange}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTickerId & t1.sbTickerExchange == t0.sbTickerExchange, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) AGGREGATE(keys={'sbTickerExchange': sbTickerExchange}, aggregations={}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/part_cross_part_b.txt b/tests/test_plan_refsols/part_cross_part_b.txt index f32335bba..6524835a9 100644 --- a/tests/test_plan_refsols/part_cross_part_b.txt +++ b/tests/test_plan_refsols/part_cross_part_b.txt @@ -7,8 +7,8 @@ ROOT(columns=[('state', sbCustState), ('month_of_year', month), ('n', RELSUM(arg FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/part_cross_part_c.txt b/tests/test_plan_refsols/part_cross_part_c.txt index 9062336ea..9b061b966 100644 --- a/tests/test_plan_refsols/part_cross_part_c.txt +++ b/tests/test_plan_refsols/part_cross_part_c.txt @@ -8,8 +8,8 @@ ROOT(columns=[('state', sbCustState), ('max_n', max_n)], orderings=[]) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/percentile_customers_per_region.txt b/tests/test_plan_refsols/percentile_customers_per_region.txt index 0000648c9..627b425c7 100644 --- a/tests/test_plan_refsols/percentile_customers_per_region.txt +++ b/tests/test_plan_refsols/percentile_customers_per_region.txt @@ -1,7 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(c_name):asc_first]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(c_acctbal):asc_last]) == 95:numeric & ENDSWITH(c_phone, '00':string), columns={'c_name': c_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/quantile_function_test_1.txt b/tests/test_plan_refsols/quantile_function_test_1.txt index c460cafe5..3472faf9c 100644 --- a/tests/test_plan_refsols/quantile_function_test_1.txt +++ b/tests/test_plan_refsols/quantile_function_test_1.txt @@ -1,6 +1,4 @@ ROOT(columns=[('seventieth_order_price', seventieth_order_price)], orderings=[]) AGGREGATE(keys={}, aggregations={'seventieth_order_price': QUANTILE(o_totalprice, 0.7:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_totalprice': t1.o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/rank_customers_per_region.txt b/tests/test_plan_refsols/rank_customers_per_region.txt index 272d3f0a6..b21f4c0b5 100644 --- a/tests/test_plan_refsols/rank_customers_per_region.txt +++ b/tests/test_plan_refsols/rank_customers_per_region.txt @@ -1,6 +1,4 @@ ROOT(columns=[('nation_name', n_name), ('name', c_name), ('cust_rank', RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first], allow_ties=True, dense=True))], orderings=[]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 60f1383e8..610f76d3a 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,7 +1,5 @@ ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first]))], orderings=[(RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])):asc_first], limit=5:numeric) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/region_nation_window_aggs.txt b/tests/test_plan_refsols/region_nation_window_aggs.txt index 0f1fb8476..219f9e2a2 100644 --- a/tests/test_plan_refsols/region_nation_window_aggs.txt +++ b/tests/test_plan_refsols/region_nation_window_aggs.txt @@ -1,5 +1,3 @@ ROOT(columns=[('nation_name', n_name), ('key_sum', RELSUM(args=[n_nationkey], partition=[n_regionkey], order=[])), ('key_avg', RELAVG(args=[n_nationkey], partition=[n_regionkey], order=[])), ('n_short_comment', RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[n_regionkey], order=[])), ('n_nations', RELSIZE(args=[], partition=[n_regionkey], order=[]))], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/regional_suppliers_percentile.txt b/tests/test_plan_refsols/regional_suppliers_percentile.txt index d968b9e59..93b25c240 100644 --- a/tests/test_plan_refsols/regional_suppliers_percentile.txt +++ b/tests/test_plan_refsols/regional_suppliers_percentile.txt @@ -2,9 +2,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(n_rows):asc_last, (s_name):asc_last], n_buckets=1000) == 1000:numeric, columns={'s_name': s_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/simple_cross_10.txt b/tests/test_plan_refsols/simple_cross_10.txt index 4113ecab2..bdf4299bd 100644 --- a/tests/test_plan_refsols/simple_cross_10.txt +++ b/tests/test_plan_refsols/simple_cross_10.txt @@ -2,8 +2,8 @@ ROOT(columns=[('region_name', r_name), ('n_other_nations', DEFAULT_TO(n_rows, 0: JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey}) - JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_11.txt b/tests/test_plan_refsols/simple_cross_11.txt index 27cff2d37..4b7aa81fd 100644 --- a/tests/test_plan_refsols/simple_cross_11.txt +++ b/tests/test_plan_refsols/simple_cross_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={}, aggregations={'min_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/simple_cross_2.txt b/tests/test_plan_refsols/simple_cross_2.txt index a5f30f5fa..bf441b987 100644 --- a/tests/test_plan_refsols/simple_cross_2.txt +++ b/tests/test_plan_refsols/simple_cross_2.txt @@ -1,4 +1,4 @@ ROOT(columns=[('r1', r_name), ('r2', r2)], orderings=[(r_name):asc_first, (r2):asc_first]) - JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r2': t1.r_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r2': t1.r_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_4.txt b/tests/test_plan_refsols/simple_cross_4.txt index 0dd20b88a..59617616d 100644 --- a/tests/test_plan_refsols/simple_cross_4.txt +++ b/tests/test_plan_refsols/simple_cross_4.txt @@ -2,6 +2,6 @@ ROOT(columns=[('region_name', r_name), ('n_other_regions', DEFAULT_TO(n_rows, 0: JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_6.txt b/tests/test_plan_refsols/simple_cross_6.txt index 20c725da5..10bb19f6e 100644 --- a/tests/test_plan_refsols/simple_cross_6.txt +++ b/tests/test_plan_refsols/simple_cross_6.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_pairs', n_pairs)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_pairs': COUNT()}) - JOIN(condition=t1.o_custkey == t0.o_custkey & t1.o_orderdate == t0.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t1.o_custkey == t0.o_custkey & t1.o_orderdate == t0.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=INTEGER(SLICE(o_clerk, 6:numeric, None:unknown, None:unknown)) >= 900:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=INTEGER(SLICE(o_clerk, 6:numeric, None:unknown, None:unknown)) >= 900:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_cross_7.txt b/tests/test_plan_refsols/simple_cross_7.txt index 78f1f71f4..69800f5c2 100644 --- a/tests/test_plan_refsols/simple_cross_7.txt +++ b/tests/test_plan_refsols/simple_cross_7.txt @@ -3,7 +3,7 @@ ROOT(columns=[('original_order_key', o_orderkey), ('n_other_orders', DEFAULT_TO( FILTER(condition=o_orderstatus == 'P':string, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_orderkey': t0.o_orderkey}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_cross_8.txt b/tests/test_plan_refsols/simple_cross_8.txt index b40fe6b0e..80804edb6 100644 --- a/tests/test_plan_refsols/simple_cross_8.txt +++ b/tests/test_plan_refsols/simple_cross_8.txt @@ -4,8 +4,8 @@ ROOT(columns=[('supplier_region', anything_supplier_region), ('customer_region', JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'name_18': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t0.key_2, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t1.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_9.txt b/tests/test_plan_refsols/simple_cross_9.txt index 7a1abb072..715af5d11 100644 --- a/tests/test_plan_refsols/simple_cross_9.txt +++ b/tests/test_plan_refsols/simple_cross_9.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n1', n_name), ('n2', n2)], orderings=[(n_name):asc_first, (n2):asc_first], limit=10:numeric) - JOIN(condition=t0.n_name != t1.n_name & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n2': t1.n_name, 'n_name': t0.n_name}) - JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.n_name != t1.n_name & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n2': t1.n_name, 'n_name': t0.n_name}) + JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index f81eb8d36..b9a9772c7 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -5,7 +5,7 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) + JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 601a5a746..1f9d454fc 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -1,5 +1,5 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_4, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_n_rows, 0:numeric)), ('incidents', DEFAULT_TO(sum_expr_4, 0:numeric))], orderings=[(year - YEAR(release_date)):asc_first]) - JOIN(condition=YEAR(t0.release_date) <= t1.year_1, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year_1}) + JOIN(condition=YEAR(t0.release_date) <= t1.year_1, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year_1}) AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) @@ -10,7 +10,7 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) @@ -18,7 +18,7 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index b1f19bec2..fd04ea420 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -5,10 +5,10 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/tpch_q19.txt b/tests/test_plan_refsols/tpch_q19.txt index c3a94e9b4..447f85a3c 100644 --- a/tests/test_plan_refsols/tpch_q19.txt +++ b/tests/test_plan_refsols/tpch_q19.txt @@ -1,6 +1,6 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 05eafcbc8..8e0003f34 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -5,7 +5,7 @@ ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], o FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index dbca0c73d..5c7637dd0 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,8 +1,8 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) - JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) + JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index 0a7c9f97d..d7be30887 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first], limit=10:numeric) AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index 813d42b01..2703970fa 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,7 +1,7 @@ ROOT(columns=[('SUPP_NATION', supp_nation), ('CUST_NATION', n_name), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(supp_nation):asc_first, (n_name):asc_first, (l_year):asc_first]) AGGREGATE(keys={'l_year': YEAR(l_shipdate), 'n_name': n_name, 'supp_nation': supp_nation}, aggregations={'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey & t0.n_name == 'FRANCE':string & t1.n_name == 'GERMANY':string | t0.n_name == 'GERMANY':string & t1.n_name == 'FRANCE':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supp_nation': t0.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey & t0.n_name == 'FRANCE':string & t1.n_name == 'GERMANY':string | t0.n_name == 'GERMANY':string & t1.n_name == 'FRANCE':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supp_nation': t0.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q8.txt b/tests/test_plan_refsols/tpch_q8.txt index f27de3c5c..f54d41956 100644 --- a/tests/test_plan_refsols/tpch_q8.txt +++ b/tests/test_plan_refsols/tpch_q8.txt @@ -1,12 +1,12 @@ ROOT(columns=[('O_YEAR', O_YEAR), ('MKT_SHARE', DEFAULT_TO(sum_brazil_volume, 0:numeric) / DEFAULT_TO(sum_volume, 0:numeric))], orderings=[]) AGGREGATE(keys={'O_YEAR': YEAR(o_orderdate)}, aggregations={'sum_brazil_volume': SUM(IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) FILTER(condition=ISIN(YEAR(o_orderdate), [1995, 1996]:array[unknown]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index c3c89419b..63846c131 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,4 +1,2 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w2', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w3', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w6', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w7', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))), ('w8', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) - SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index dbbba0f08..967cacd49 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,4 +1,2 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w2', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w3', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w6', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w7', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))), ('w8', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) - SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_pydough_from_string.py b/tests/test_pydough_from_string.py index 9919c38f3..1867b4f29 100644 --- a/tests/test_pydough_from_string.py +++ b/tests/test_pydough_from_string.py @@ -17,6 +17,7 @@ from tests.testing_utilities import graph_fetcher +@pytest.mark.execute @pytest.mark.parametrize( "pydough_code, answer_variable, env, answer", [ diff --git a/tests/test_sql_refsols/correl_36_sqlite.sql b/tests/test_sql_refsols/correl_36_sqlite.sql index 22d824e86..01635b227 100644 --- a/tests/test_sql_refsols/correl_36_sqlite.sql +++ b/tests/test_sql_refsols/correl_36_sqlite.sql @@ -1,16 +1,16 @@ -WITH _s3 AS ( +WITH _s1 AS ( SELECT p_partkey, p_type FROM tpch.part -), _s21 AS ( +), _t0 AS ( SELECT DISTINCT orders.o_orderkey AS key_12, lineitem.l_linenumber, lineitem.l_orderkey FROM tpch.lineitem AS lineitem - JOIN _s3 AS _s3 - ON _s3.p_partkey = lineitem.l_partkey + JOIN _s1 AS _s1 + ON _s1.p_partkey = lineitem.l_partkey JOIN tpch.supplier AS supplier ON lineitem.l_suppkey = supplier.s_suppkey JOIN tpch.orders AS orders @@ -44,20 +44,11 @@ WITH _s3 AS ( END = 1 AND CAST(STRFTIME('%Y', lineitem_2.l_shipdate) AS INTEGER) = 1997 AND lineitem_2.l_orderkey = orders_2.o_orderkey - JOIN _s3 AS _s19 - ON _s19.p_partkey = lineitem_2.l_partkey AND _s19.p_type = _s3.p_type + JOIN _s1 AS _s17 + ON _s1.p_type = _s17.p_type AND _s17.p_partkey = lineitem_2.l_partkey WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 ) SELECT COUNT(*) AS n -FROM tpch.lineitem AS lineitem -JOIN tpch.orders AS orders - ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 - AND lineitem.l_orderkey = orders.o_orderkey -JOIN _s21 AS _s21 - ON _s21.key_12 = orders.o_orderkey - AND _s21.l_linenumber = lineitem.l_linenumber - AND _s21.l_orderkey = lineitem.l_orderkey -WHERE - CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 +FROM _t0 diff --git a/tests/test_sql_refsols/quantile_test_1_ansi.sql b/tests/test_sql_refsols/quantile_test_1_ansi.sql index aa50858da..f06e8e049 100644 --- a/tests/test_sql_refsols/quantile_test_1_ansi.sql +++ b/tests/test_sql_refsols/quantile_test_1_ansi.sql @@ -1,7 +1,6 @@ SELECT PERCENTILE_DISC(0.7) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS seventieth_order_price -FROM tpch.customer AS customer -JOIN tpch.orders AS orders - ON EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) = 1998 - AND customer.c_custkey = orders.o_custkey + o_totalprice NULLS LAST) AS seventieth_order_price +FROM tpch.orders +WHERE + EXTRACT(YEAR FROM CAST(o_orderdate AS DATETIME)) = 1998 diff --git a/tests/test_sql_refsols/quantile_test_1_mysql.sql b/tests/test_sql_refsols/quantile_test_1_mysql.sql index 015388656..421c09607 100644 --- a/tests/test_sql_refsols/quantile_test_1_mysql.sql +++ b/tests/test_sql_refsols/quantile_test_1_mysql.sql @@ -1,14 +1,13 @@ WITH _t0 AS ( SELECT CASE - WHEN TRUNCATE(CAST(0.30000000000000004 * COUNT(ORDERS.o_totalprice) OVER () AS FLOAT), 0) < ROW_NUMBER() OVER (ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + WHEN TRUNCATE(CAST(0.30000000000000004 * COUNT(o_totalprice) OVER () AS FLOAT), 0) < ROW_NUMBER() OVER (ORDER BY o_totalprice DESC) + THEN o_totalprice ELSE NULL END AS expr_1 - FROM tpch.CUSTOMER AS CUSTOMER - JOIN tpch.ORDERS AS ORDERS - ON CUSTOMER.c_custkey = ORDERS.o_custkey - AND EXTRACT(YEAR FROM CAST(ORDERS.o_orderdate AS DATETIME)) = 1998 + FROM tpch.ORDERS + WHERE + EXTRACT(YEAR FROM CAST(o_orderdate AS DATETIME)) = 1998 ) SELECT MAX(expr_1) AS seventieth_order_price diff --git a/tests/test_sql_refsols/quantile_test_1_sqlite.sql b/tests/test_sql_refsols/quantile_test_1_sqlite.sql index 01fc46e7c..39fab677f 100644 --- a/tests/test_sql_refsols/quantile_test_1_sqlite.sql +++ b/tests/test_sql_refsols/quantile_test_1_sqlite.sql @@ -1,14 +1,13 @@ WITH _t0 AS ( SELECT CASE - WHEN CAST(0.30000000000000004 * COUNT(orders.o_totalprice) OVER () AS INTEGER) < ROW_NUMBER() OVER (ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.30000000000000004 * COUNT(o_totalprice) OVER () AS INTEGER) < ROW_NUMBER() OVER (ORDER BY o_totalprice DESC) + THEN o_totalprice ELSE NULL END AS expr_1 - FROM tpch.customer AS customer - JOIN tpch.orders AS orders - ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 - AND customer.c_custkey = orders.o_custkey + FROM tpch.orders + WHERE + CAST(STRFTIME('%Y', o_orderdate) AS INTEGER) = 1998 ) SELECT MAX(expr_1) AS seventieth_order_price diff --git a/tests/test_sql_refsols/window_functions_ansi.sql b/tests/test_sql_refsols/window_functions_ansi.sql index 4b1b37292..10721b5f4 100644 --- a/tests/test_sql_refsols/window_functions_ansi.sql +++ b/tests/test_sql_refsols/window_functions_ansi.sql @@ -8,8 +8,6 @@ SELECT customer.c_acctbal / AVG(customer.c_acctbal) OVER (ORDER BY customer.c_acctbal NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS relavg_value, customer.c_acctbal / COUNT(CASE WHEN customer.c_acctbal > 0.0 THEN customer.c_acctbal ELSE NULL END) OVER () AS relcount_value, customer.c_acctbal / COUNT(*) OVER () AS relsize_value -FROM tpch.region AS region -JOIN tpch.nation AS nation - ON nation.n_regionkey = region.r_regionkey +FROM tpch.nation AS nation JOIN tpch.customer AS customer ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/window_functions_mysql.sql b/tests/test_sql_refsols/window_functions_mysql.sql index 62661daa7..f51f418ee 100644 --- a/tests/test_sql_refsols/window_functions_mysql.sql +++ b/tests/test_sql_refsols/window_functions_mysql.sql @@ -8,8 +8,6 @@ SELECT CUSTOMER.c_acctbal / AVG(CUSTOMER.c_acctbal) OVER (ORDER BY CUSTOMER.c_acctbal ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS relavg_value, CUSTOMER.c_acctbal / COUNT(CASE WHEN CUSTOMER.c_acctbal > 0.0 THEN CUSTOMER.c_acctbal ELSE NULL END) OVER () AS relcount_value, CUSTOMER.c_acctbal / COUNT(*) OVER () AS relsize_value -FROM tpch.REGION AS REGION -JOIN tpch.NATION AS NATION - ON NATION.n_regionkey = REGION.r_regionkey +FROM tpch.NATION AS NATION JOIN tpch.CUSTOMER AS CUSTOMER ON CUSTOMER.c_nationkey = NATION.n_nationkey diff --git a/tests/test_sql_refsols/window_functions_sqlite.sql b/tests/test_sql_refsols/window_functions_sqlite.sql index 7c648b070..c60adccfd 100644 --- a/tests/test_sql_refsols/window_functions_sqlite.sql +++ b/tests/test_sql_refsols/window_functions_sqlite.sql @@ -8,8 +8,6 @@ SELECT CAST(customer.c_acctbal AS REAL) / AVG(customer.c_acctbal) OVER (ORDER BY customer.c_acctbal ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS relavg_value, CAST(customer.c_acctbal AS REAL) / COUNT(CASE WHEN customer.c_acctbal > 0.0 THEN customer.c_acctbal ELSE NULL END) OVER () AS relcount_value, CAST(customer.c_acctbal AS REAL) / COUNT(*) OVER () AS relsize_value -FROM tpch.region AS region -JOIN tpch.nation AS nation - ON nation.n_regionkey = region.r_regionkey +FROM tpch.nation AS nation JOIN tpch.customer AS customer ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/window_sliding_frame_relsize_ansi.sql b/tests/test_sql_refsols/window_sliding_frame_relsize_ansi.sql index 093cb0ab5..4c8a3bb8b 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsize_ansi.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsize_ansi.sql @@ -1,16 +1,14 @@ SELECT - sbtransaction.sbtxid AS transaction_id, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 -FROM main.sbcustomer AS sbcustomer -JOIN main.sbtransaction AS sbtransaction - ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + sbtxid AS transaction_id, + COUNT(*) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, + COUNT(*) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + COUNT(*) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, + COUNT(*) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 +FROM main.sbtransaction ORDER BY - sbtransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsize_mysql.sql b/tests/test_sql_refsols/window_sliding_frame_relsize_mysql.sql index e3ecc2587..7fb2f5a2f 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsize_mysql.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsize_mysql.sql @@ -1,16 +1,14 @@ SELECT - sbTransaction.sbtxid AS transaction_id, - COUNT(*) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, - COUNT(*) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, - COUNT(*) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - COUNT(*) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - COUNT(*) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, - COUNT(*) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, - COUNT(*) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, - COUNT(*) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 -FROM main.sbCustomer AS sbCustomer -JOIN main.sbTransaction AS sbTransaction - ON sbCustomer.sbcustid = sbTransaction.sbtxcustid + sbtxid AS transaction_id, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 +FROM main.sbTransaction ORDER BY - sbTransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsize_sqlite.sql b/tests/test_sql_refsols/window_sliding_frame_relsize_sqlite.sql index 57e3d18ed..a7babc7d7 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsize_sqlite.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsize_sqlite.sql @@ -1,16 +1,14 @@ SELECT - sbtransaction.sbtxid AS transaction_id, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 -FROM main.sbcustomer AS sbcustomer -JOIN main.sbtransaction AS sbtransaction - ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + sbtxid AS transaction_id, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 +FROM main.sbtransaction ORDER BY - sbtransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsum_ansi.sql b/tests/test_sql_refsols/window_sliding_frame_relsum_ansi.sql index 1c4e4da48..2db332255 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsum_ansi.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsum_ansi.sql @@ -1,16 +1,14 @@ SELECT - sbtransaction.sbtxid AS transaction_id, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 -FROM main.sbcustomer AS sbcustomer -JOIN main.sbtransaction AS sbtransaction - ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + sbtxid AS transaction_id, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 +FROM main.sbtransaction ORDER BY - sbtransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsum_mysql.sql b/tests/test_sql_refsols/window_sliding_frame_relsum_mysql.sql index ff2f29e69..e6237d92e 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsum_mysql.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsum_mysql.sql @@ -1,16 +1,14 @@ SELECT - sbTransaction.sbtxid AS transaction_id, - SUM(sbTransaction.sbtxshares) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, - SUM(sbTransaction.sbtxshares) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, - SUM(sbTransaction.sbtxshares) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - SUM(sbTransaction.sbtxshares) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - SUM(sbTransaction.sbtxshares) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, - SUM(sbTransaction.sbtxshares) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, - SUM(sbTransaction.sbtxshares) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, - SUM(sbTransaction.sbtxshares) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 -FROM main.sbCustomer AS sbCustomer -JOIN main.sbTransaction AS sbTransaction - ON sbCustomer.sbcustid = sbTransaction.sbtxcustid + sbtxid AS transaction_id, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 +FROM main.sbTransaction ORDER BY - sbTransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsum_sqlite.sql b/tests/test_sql_refsols/window_sliding_frame_relsum_sqlite.sql index af2442b96..ed9b5c93b 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsum_sqlite.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsum_sqlite.sql @@ -1,16 +1,14 @@ SELECT - sbtransaction.sbtxid AS transaction_id, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 -FROM main.sbcustomer AS sbcustomer -JOIN main.sbtransaction AS sbtransaction - ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + sbtxid AS transaction_id, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 +FROM main.sbtransaction ORDER BY - sbtransaction.sbtxdatetime + sbtxdatetime LIMIT 8 From 8f7fbbe29e03a85705cdd7534532321599bd0638 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 25 Aug 2025 16:01:07 -0400 Subject: [PATCH 089/143] Fixing bug [RUN CI] --- pydough/conversion/filter_pushdown.py | 1 - pydough/conversion/relational_converter.py | 25 +++++++++++++------ .../access_partition_child_after_filter.txt | 6 ++++- .../month_year_sliding_windows.txt | 14 ++++++++--- .../multi_partition_access_3.txt | 9 ++++--- .../test_plan_refsols/rank_with_filters_c.txt | 2 +- 6 files changed, 40 insertions(+), 17 deletions(-) diff --git a/pydough/conversion/filter_pushdown.py b/pydough/conversion/filter_pushdown.py index e247bd132..b77267bc7 100644 --- a/pydough/conversion/filter_pushdown.py +++ b/pydough/conversion/filter_pushdown.py @@ -248,7 +248,6 @@ def visit_join(self, join: Join) -> RelationalNode: cardinality = join.cardinality.add_filter() else: reverse_cardinality = reverse_cardinality.add_filter() - # Do the same pushable_filters = { expr.accept_shuttle(transposer) for expr in pushable_filters } diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index a751bfba4..25f47acba 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1081,6 +1081,7 @@ def translate_partition_child( self, node: HybridPartitionChild, context: TranslationOutput | None, + preceding_hybrid: HybridTree | None, ) -> TranslationOutput: """ Converts a step into the child of a PARTITION node into a join between @@ -1092,6 +1093,8 @@ def translate_partition_child( `context`: the data structure storing information used by the conversion, such as bindings of already translated terms from preceding contexts. + `preceding_hybrid`: the previous layer in the hybrid tree above the + current level. Returns: The TranslationOutput payload containing expressions for both the @@ -1127,7 +1130,9 @@ def translate_partition_child( child_output, JoinType.INNER, JoinCardinality.PLURAL_FILTER, - JoinCardinality.SINGULAR_ACCESS, + JoinCardinality.SINGULAR_ACCESS + if preceding_hybrid is not None and preceding_hybrid.always_exists() + else JoinCardinality.SINGULAR_FILTER, join_keys, None, None, @@ -1305,7 +1310,11 @@ def rel_translation( else: result = self.build_simple_table_scan(operation) case HybridPartitionChild(): - result = self.translate_partition_child(operation, context) + result = self.translate_partition_child( + operation, + context, + preceding_hybrid[0] if preceding_hybrid is not None else None, + ) case HybridCalculate(): assert context is not None, "Malformed HybridTree pattern." result = self.translate_calculate(operation, context) @@ -1473,12 +1482,14 @@ def optimize_relational_tree( The optimized relational root. """ + pruner: ColumnPruner = ColumnPruner() + # Step 0: prune unused columns. This is done early to remove as many dead # names as possible so that steps that require generating column names can # use nicer names instead of generating nastier ones to avoid collisions. # It also speeds up all subsequent steps by reducing the total number of # objects inside the plan. - root = ColumnPruner().prune_unused_columns(root) + root = pruner.prune_unused_columns(root) # Step 1: push filters down as far as possible root = confirm_root(push_filters(root)) @@ -1500,10 +1511,10 @@ def optimize_relational_tree( root = confirm_root(merge_projects(root)) # Step 6: re-run column pruning after the various steps, which may have - # rendered more columns unused. This is done befre the next step to remove + # rendered more columns unused. This is done before the next step to remove # as many column names as possible so the column bubbling step can try to # use nicer names without worrying about collisions. - root = ColumnPruner().prune_unused_columns(root) + root = pruner.prune_unused_columns(root) # Step 7: bubble up names from the leaf nodes to further encourage simpler # naming without aliases, and also to delete duplicate columns where @@ -1524,7 +1535,7 @@ def optimize_relational_tree( root = confirm_root(pullup_projections(root)) simplify_expressions(root, additional_shuttles) root = confirm_root(push_filters(root)) - root = ColumnPruner().prune_unused_columns(root) + root = pruner.prune_unused_columns(root) # Step 9: re-run projection merging, without pushing into joins. This # will allow some redundant projections created by pullup to be removed @@ -1538,7 +1549,7 @@ def optimize_relational_tree( # Step 11: re-run column pruning one last time to remove any columns that # are no longer used after the final round of transformations. - root = ColumnPruner().prune_unused_columns(root) + root = pruner.prune_unused_columns(root) return root diff --git a/tests/test_plan_refsols/access_partition_child_after_filter.txt b/tests/test_plan_refsols/access_partition_child_after_filter.txt index e0889ae05..baeffc577 100644 --- a/tests/test_plan_refsols/access_partition_child_after_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_after_filter.txt @@ -1,2 +1,6 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) + JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + FILTER(condition=avg_p_retailprice > 27.5:numeric, columns={'p_type': p_type}) + AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index b8ba05d61..d91f3ab37 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,5 +1,11 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_1):asc_last, (month_1):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_1):asc_last, (month_1):asc_last], default=0.0), columns={'month': month_1, 'year': year_1}) - AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) + JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index e2cf74fb0..0274e95db 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,9 +1,12 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'type_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) + AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) + SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'ticker_high_price': MAX(sbDpClose)}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/rank_with_filters_c.txt b/tests/test_plan_refsols/rank_with_filters_c.txt index f985be11c..f778ba043 100644 --- a/tests/test_plan_refsols/rank_with_filters_c.txt +++ b/tests/test_plan_refsols/rank_with_filters_c.txt @@ -1,6 +1,6 @@ ROOT(columns=[('pname', p_name), ('psize', size_3)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[p_size], order=[(p_retailprice):desc_first]) == 1:numeric, columns={'p_name': p_name, 'size_3': size_3}) - JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) + JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) LIMIT(limit=5:numeric, columns={'p_size': p_size}, orderings=[(p_size):desc_last]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) SCAN(table=tpch.PART, columns={'p_size': p_size}) From d797c439b4a5b4cfef381d4cfaed5d1666dd4b5c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 26 Aug 2025 10:54:52 -0400 Subject: [PATCH 090/143] Adjusting aggregation splitting to account for reverse cardinality [RUN CI] [RUN MYSQL] --- pydough/conversion/agg_split.py | 1 + tests/test_plan_refsols/common_prefix_af.txt | 16 ++++---- tests/test_plan_refsols/common_prefix_o.txt | 31 +++++++------- tests/test_plan_refsols/correl_14.txt | 8 ++-- tests/test_plan_refsols/correl_15.txt | 8 ++-- tests/test_plan_refsols/correl_35.txt | 22 +++++----- .../count_cust_supplier_nation_combos.txt | 32 +++++++-------- .../customers_sum_line_price.txt | 11 +++-- .../multi_partition_access_2.txt | 30 +++++++------- .../nations_sum_line_price.txt | 11 +++-- .../regions_sum_line_price.txt | 11 +++-- tests/test_plan_refsols/triple_partition.txt | 33 ++++++++------- tests/test_sql_refsols/correl_14_sqlite.sql | 11 ++--- tests/test_sql_refsols/correl_15_sqlite.sql | 11 ++--- tests/test_sql_refsols/correl_35_sqlite.sql | 40 +++++-------------- .../defog_broker_adv14_ansi.sql | 18 +++++++-- .../defog_broker_adv14_mysql.sql | 18 +++++++-- .../defog_broker_adv14_sqlite.sql | 22 +++++++--- 18 files changed, 165 insertions(+), 169 deletions(-) diff --git a/pydough/conversion/agg_split.py b/pydough/conversion/agg_split.py index bcbf82cf6..e4081861f 100644 --- a/pydough/conversion/agg_split.py +++ b/pydough/conversion/agg_split.py @@ -322,6 +322,7 @@ def attempt_join_aggregate_transpose( # if joining first will reduce the number of rows that get aggregated. if join.cardinality.filters: can_push_left = False + if join.reverse_cardinality.filters: can_push_right = False # If any of the aggregations to either side cannot be pushed down, then diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index e2a896fca..ce89a069c 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -1,15 +1,15 @@ -ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_c_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_c_name': t1.max_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) +ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_anything_c_name)], orderings=[(n_name):asc_first]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_anything_c_name': t1.max_anything_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_c_name': max_c_name, 'n_rows': n_rows}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_name': MAX(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_anything_c_name': max_anything_c_name, 'n_rows': n_rows}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_anything_c_name': MAX(anything_c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'anything_c_name': t1.anything_c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'n_rows': t0.n_rows, 'o_custkey': t0.o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'n_rows': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 199fb1393..b4603c210 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_small_parts': sum_sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'n_small_parts': sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t1.sum_agg_5, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'sum_agg_5': sum_agg_5, 'sum_p_retailprice': sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index b8d16913a..5341118f8 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'supplier_avg_price': AVG(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index d821dbbe7..0795bc81e 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,14 +1,14 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'supplier_avg_price': AVG(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index 96ec04e08..9b491ce4d 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -10,15 +10,13 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index 3c1b317ec..0b0bbd46e 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -1,19 +1,15 @@ -ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_sum_sum_agg_0), ('total_value', DEFAULT_TO(sum_sum_sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year': year}, aggregations={'sum_sum_sum_sum_agg_0': SUM(sum_sum_sum_agg_0), 'sum_sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'sum_sum_sum_agg_0': t0.sum_sum_sum_agg_0, 'sum_sum_sum_sum_l_extendedprice': t0.sum_sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) - AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year': year}, aggregations={'sum_sum_sum_agg_0': SUM(sum_sum_agg_0), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) +ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', n_rows), ('total_value', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT(), 'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'supplier_nation': t1.n_name}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_suppkey': t1.ps_suppkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/customers_sum_line_price.txt b/tests/test_plan_refsols/customers_sum_line_price.txt index 082a9e3e7..799b8abe5 100644 --- a/tests/test_plan_refsols/customers_sum_line_price.txt +++ b/tests/test_plan_refsols/customers_sum_line_price.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('okey', c_custkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) +ROOT(columns=[('okey', c_custkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 95593d4d1..cad1d95b5 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,28 +1,30 @@ -ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', cus_tick_typ_avg_shares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', cust_avg_shares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) +ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', cus_tick_typ_avg_shares), ('cust_tick_avg_shares', sum_sum_sbTxShares_1 / sum_count_sbTxShares_1), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares_1 / t0.sum_count_sbTxShares_1 & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t1.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) + JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_sbTxShares': t1.sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'cus_tick_typ_avg_shares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/nations_sum_line_price.txt b/tests/test_plan_refsols/nations_sum_line_price.txt index 7ec5765c3..6c3ad7c6f 100644 --- a/tests/test_plan_refsols/nations_sum_line_price.txt +++ b/tests/test_plan_refsols/nations_sum_line_price.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('okey', n_nationkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) +ROOT(columns=[('okey', n_nationkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'l_extendedprice': t1.l_extendedprice}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/regions_sum_line_price.txt b/tests/test_plan_refsols/regions_sum_line_price.txt index a8f014f55..67a0c6401 100644 --- a/tests/test_plan_refsols/regions_sum_line_price.txt +++ b/tests/test_plan_refsols/regions_sum_line_price.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('okey', r_regionkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) +ROOT(columns=[('okey', r_regionkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_regionkey': t0.n_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index 067b2808d..f59dd133f 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -1,23 +1,22 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[(supp_region):asc_first]) AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / sum_n_instances)}) AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) - AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': SUM(n_instances)}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_instances': t0.n_instances, 'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) - AGGREGATE(keys={'o_custkey': o_custkey, 'p_type': p_type, 'r_name': r_name}, aggregations={'n_instances': COUNT()}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) - FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) + FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index 6e964f99b..2bf406ddb 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,13 +1,12 @@ WITH _s3 AS ( SELECT - SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr_1, - SUM(part.p_retailprice) AS sum_p_retailprice, + AVG(part.p_retailprice) AS supplier_avg_price, partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey GROUP BY - 3 + 2 ) SELECT COUNT(DISTINCT supplier.s_suppkey) AS n @@ -17,11 +16,9 @@ JOIN _s3 AS _s3 JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN tpch.part AS part - ON part.p_container = 'LG DRUM' + ON _s3.supplier_avg_price > part.p_retailprice + AND part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey - AND part.p_retailprice < ( - CAST(_s3.sum_p_retailprice AS REAL) / _s3.sum_expr_1 - ) AND part.p_retailprice < ( partsupp.ps_supplycost * 1.5 ) diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index f59429df8..668627c41 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -4,14 +4,13 @@ WITH _s0 AS ( FROM tpch.part ), _s5 AS ( SELECT - SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr_1, - SUM(part.p_retailprice) AS sum_p_retailprice, + AVG(part.p_retailprice) AS supplier_avg_price, partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey GROUP BY - 3 + 2 ) SELECT COUNT(DISTINCT supplier.s_suppkey) AS n @@ -23,11 +22,9 @@ JOIN _s5 AS _s5 JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN tpch.part AS part - ON part.p_container = 'LG DRUM' + ON _s5.supplier_avg_price > part.p_retailprice + AND part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey - AND part.p_retailprice < ( - CAST(_s5.sum_p_retailprice AS REAL) / _s5.sum_expr_1 - ) AND part.p_retailprice < ( _s0.global_avg_price * 0.85 ) diff --git a/tests/test_sql_refsols/correl_35_sqlite.sql b/tests/test_sql_refsols/correl_35_sqlite.sql index b1750bbd1..cfa10bec4 100644 --- a/tests/test_sql_refsols/correl_35_sqlite.sql +++ b/tests/test_sql_refsols/correl_35_sqlite.sql @@ -3,13 +3,12 @@ WITH _s1 AS ( p_partkey, p_type FROM tpch.part -), _s10 AS ( - SELECT - COUNT(*) AS n_rows, +), _s13 AS ( + SELECT DISTINCT customer.c_custkey, customer.c_nationkey, - lineitem.l_partkey, - orders.o_orderpriority + orders.o_orderpriority, + _s11.p_type FROM tpch.customer AS customer JOIN tpch.orders AS orders ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1997 @@ -31,26 +30,8 @@ WITH _s1 AS ( END = 1 AND CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1997 AND lineitem.l_orderkey = orders.o_orderkey - GROUP BY - 2, - 3, - 4, - 5 -), _t3 AS ( - SELECT - SUM(_s10.n_rows) AS sum_n_rows, - _s10.c_custkey, - _s10.c_nationkey, - _s10.o_orderpriority, - _s11.p_type - FROM _s10 AS _s10 JOIN _s1 AS _s11 - ON _s10.l_partkey = _s11.p_partkey - GROUP BY - 2, - 3, - 4, - 5 + ON _s11.p_partkey = lineitem.l_partkey ) SELECT COUNT(*) AS n @@ -62,11 +43,10 @@ JOIN tpch.supplier AS supplier JOIN tpch.orders AS orders ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 AND lineitem.l_orderkey = orders.o_orderkey -JOIN _t3 AS _t3 - ON _s1.p_type = _t3.p_type - AND _t3.c_custkey = orders.o_custkey - AND _t3.c_nationkey = supplier.s_nationkey - AND _t3.o_orderpriority = orders.o_orderpriority - AND _t3.sum_n_rows > 0 +JOIN _s13 AS _s13 + ON _s1.p_type = _s13.p_type + AND _s13.c_custkey = orders.o_custkey + AND _s13.c_nationkey = supplier.s_nationkey + AND _s13.o_orderpriority = orders.o_orderpriority WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 diff --git a/tests/test_sql_refsols/defog_broker_adv14_ansi.sql b/tests/test_sql_refsols/defog_broker_adv14_ansi.sql index f7e6196f3..a8afab917 100644 --- a/tests/test_sql_refsols/defog_broker_adv14_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv14_ansi.sql @@ -1,9 +1,19 @@ +WITH _s1 AS ( + SELECT + COUNT(sbdpclose) AS count_sbdpclose, + SUM(sbdpclose) AS sum_sbdpclose, + sbdptickerid + FROM main.sbdailyprice + WHERE + DATEDIFF(CURRENT_TIMESTAMP(), CAST(sbdpdate AS DATETIME), DAY) <= 7 + GROUP BY + 3 +) SELECT sbticker.sbtickertype AS ticker_type, - AVG(sbdailyprice.sbdpclose) AS ACP + SUM(_s1.sum_sbdpclose) / SUM(_s1.count_sbdpclose) AS ACP FROM main.sbticker AS sbticker -JOIN main.sbdailyprice AS sbdailyprice - ON DATEDIFF(CURRENT_TIMESTAMP(), CAST(sbdailyprice.sbdpdate AS DATETIME), DAY) <= 7 - AND sbdailyprice.sbdptickerid = sbticker.sbtickerid +JOIN _s1 AS _s1 + ON _s1.sbdptickerid = sbticker.sbtickerid GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_broker_adv14_mysql.sql b/tests/test_sql_refsols/defog_broker_adv14_mysql.sql index 2e6957884..4dee5aaa5 100644 --- a/tests/test_sql_refsols/defog_broker_adv14_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv14_mysql.sql @@ -1,9 +1,19 @@ +WITH _s1 AS ( + SELECT + COUNT(sbdpclose) AS count_sbDpClose, + SUM(sbdpclose) AS sum_sbDpClose, + sbdptickerid AS sbDpTickerId + FROM main.sbDailyPrice + WHERE + DATEDIFF(CURRENT_TIMESTAMP(), sbdpdate) <= 7 + GROUP BY + 3 +) SELECT sbTicker.sbtickertype AS ticker_type, - AVG(sbDailyPrice.sbdpclose) AS ACP + SUM(_s1.sum_sbDpClose) / SUM(_s1.count_sbDpClose) AS ACP FROM main.sbTicker AS sbTicker -JOIN main.sbDailyPrice AS sbDailyPrice - ON DATEDIFF(CURRENT_TIMESTAMP(), sbDailyPrice.sbdpdate) <= 7 - AND sbDailyPrice.sbdptickerid = sbTicker.sbtickerid +JOIN _s1 AS _s1 + ON _s1.sbDpTickerId = sbTicker.sbtickerid GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_broker_adv14_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv14_sqlite.sql index 38cfb931c..164838cf8 100644 --- a/tests/test_sql_refsols/defog_broker_adv14_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv14_sqlite.sql @@ -1,11 +1,21 @@ +WITH _s1 AS ( + SELECT + COUNT(sbdpclose) AS count_sbdpclose, + SUM(sbdpclose) AS sum_sbdpclose, + sbdptickerid + FROM main.sbdailyprice + WHERE + CAST(( + JULIANDAY(DATE(DATETIME('now'), 'start of day')) - JULIANDAY(DATE(sbdpdate, 'start of day')) + ) AS INTEGER) <= 7 + GROUP BY + 3 +) SELECT sbticker.sbtickertype AS ticker_type, - AVG(sbdailyprice.sbdpclose) AS ACP + CAST(SUM(_s1.sum_sbdpclose) AS REAL) / SUM(_s1.count_sbdpclose) AS ACP FROM main.sbticker AS sbticker -JOIN main.sbdailyprice AS sbdailyprice - ON CAST(( - JULIANDAY(DATE(DATETIME('now'), 'start of day')) - JULIANDAY(DATE(sbdailyprice.sbdpdate, 'start of day')) - ) AS INTEGER) <= 7 - AND sbdailyprice.sbdptickerid = sbticker.sbtickerid +JOIN _s1 AS _s1 + ON _s1.sbdptickerid = sbticker.sbtickerid GROUP BY 1 From 3041ac98e00c6bb547d88ff00bf297e7c2f66ef9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 26 Aug 2025 11:04:19 -0400 Subject: [PATCH 091/143] Stop printing cardinalities in plan files for semi/anti joins --- pydough/relational/relational_nodes/join.py | 2 ++ tests/test_plan_refsols/aggregate_anti.txt | 2 +- tests/test_plan_refsols/anti_aggregate.txt | 2 +- tests/test_plan_refsols/anti_aggregate_alternate.txt | 2 +- tests/test_plan_refsols/anti_singular.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_5.txt | 2 +- tests/test_plan_refsols/common_prefix_aa.txt | 2 +- tests/test_plan_refsols/common_prefix_ab.txt | 4 ++-- tests/test_plan_refsols/common_prefix_ac.txt | 2 +- tests/test_plan_refsols/common_prefix_z.txt | 2 +- tests/test_plan_refsols/correl_10.txt | 2 +- tests/test_plan_refsols/correl_4.txt | 2 +- tests/test_plan_refsols/correl_7.txt | 2 +- tests/test_plan_refsols/epoch_num_predawn_cold_war.txt | 2 +- tests/test_plan_refsols/multiple_has_hasnot.txt | 6 +++--- tests/test_plan_refsols/simple_anti_1.txt | 2 +- tests/test_plan_refsols/simple_anti_2.txt | 2 +- tests/test_plan_refsols/simple_semi_1.txt | 2 +- tests/test_plan_refsols/simple_semi_2.txt | 2 +- tests/test_plan_refsols/singular_anti.txt | 2 +- tests/test_plan_refsols/supplier_pct_national_qty.txt | 2 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- tests/test_plan_refsols/tpch_q4.txt | 2 +- tests/test_plan_refsols/window_filter_order_10.txt | 2 +- 24 files changed, 28 insertions(+), 26 deletions(-) diff --git a/pydough/relational/relational_nodes/join.py b/pydough/relational/relational_nodes/join.py index 6c6a7a34d..a8401e73b 100644 --- a/pydough/relational/relational_nodes/join.py +++ b/pydough/relational/relational_nodes/join.py @@ -275,11 +275,13 @@ def to_string(self, compact: bool = False) -> str: cardinality_suffix: str = ( "" if self.cardinality == JoinCardinality.UNKNOWN_UNKNOWN + or self.join_type in (JoinType.SEMI, JoinType.ANTI) else f", cardinality={self.cardinality.name}" ) reverse_cardinality_suffix: str = ( "" if self.reverse_cardinality == JoinCardinality.UNKNOWN_UNKNOWN + or self.join_type in (JoinType.SEMI, JoinType.ANTI) else f", reverse_cardinality={self.reverse_cardinality.name}" ) return f"JOIN(condition={self.condition.to_string(compact)}, type={self.join_type.name}{cardinality_suffix}{reverse_cardinality_suffix}, columns={self.make_column_string(self.columns, compact)}{correl_suffix})" diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index d780bbcb5..c61c67b94 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index b5feeed30..c61c67b94 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index 02ee0138c..c12bdd20e 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', 0:numeric), ('sum_price_of_10parts', None:unknown)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_singular.txt b/tests/test_plan_refsols/anti_singular.txt index 117716d5e..da23cf491 100644 --- a/tests/test_plan_refsols/anti_singular.txt +++ b/tests/test_plan_refsols/anti_singular.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('region_name', None:unknown)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index 3feedb723..fb5de210c 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,5 +1,5 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_aa.txt b/tests/test_plan_refsols/common_prefix_aa.txt index 6ae902b07..6c1032c23 100644 --- a/tests/test_plan_refsols/common_prefix_aa.txt +++ b/tests/test_plan_refsols/common_prefix_aa.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_ab.txt b/tests/test_plan_refsols/common_prefix_ab.txt index 323768976..14edc53f2 100644 --- a/tests/test_plan_refsols/common_prefix_ab.txt +++ b/tests/test_plan_refsols/common_prefix_ab.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ac.txt b/tests/test_plan_refsols/common_prefix_ac.txt index 6e860bc35..3e5a7d814 100644 --- a/tests/test_plan_refsols/common_prefix_ac.txt +++ b/tests/test_plan_refsols/common_prefix_ac.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 3b3b5fad6..05678bd68 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_10.txt b/tests/test_plan_refsols/correl_10.txt index fcb05e48c..66d96bafc 100644 --- a/tests/test_plan_refsols/correl_10.txt +++ b/tests/test_plan_refsols/correl_10.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rname', None:unknown)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=ANTI, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) PROJECT(columns={'expr_0': SLICE(r_name, None:unknown, 1:numeric, None:unknown), 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_4.txt b/tests/test_plan_refsols/correl_4.txt index 8f72568f4..e37bfac00 100644 --- a/tests/test_plan_refsols/correl_4.txt +++ b/tests/test_plan_refsols/correl_4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'smallest_bal': t0.smallest_bal}) diff --git a/tests/test_plan_refsols/correl_7.txt b/tests/test_plan_refsols/correl_7.txt index 7586ae463..d9e84b642 100644 --- a/tests/test_plan_refsols/correl_7.txt +++ b/tests/test_plan_refsols/correl_7.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_prefix_nations', 0:numeric)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=ANTI, columns={'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt index a3a22d030..ced52113b 100644 --- a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt +++ b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_events', n_events)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_events': COUNT()}) - JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, columns={}) JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=t_name == 'Pre-Dawn':string, columns={'t_end_hour': t_end_hour, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/multiple_has_hasnot.txt b/tests/test_plan_refsols/multiple_has_hasnot.txt index ec80d0ec2..78b9d31fd 100644 --- a/tests/test_plan_refsols/multiple_has_hasnot.txt +++ b/tests/test_plan_refsols/multiple_has_hasnot.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', p_name)], orderings=[]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, columns={'p_name': t0.p_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/simple_anti_1.txt b/tests/test_plan_refsols/simple_anti_1.txt index 1b99d8c05..c00785e14 100644 --- a/tests/test_plan_refsols/simple_anti_1.txt +++ b/tests/test_plan_refsols/simple_anti_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', c_name)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, columns={'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/simple_anti_2.txt b/tests/test_plan_refsols/simple_anti_2.txt index 322e6b23c..b87256acc 100644 --- a/tests/test_plan_refsols/simple_anti_2.txt +++ b/tests/test_plan_refsols/simple_anti_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/simple_semi_1.txt b/tests/test_plan_refsols/simple_semi_1.txt index 98a13ec16..dd41107a3 100644 --- a/tests/test_plan_refsols/simple_semi_1.txt +++ b/tests/test_plan_refsols/simple_semi_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', c_name)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, columns={'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/simple_semi_2.txt b/tests/test_plan_refsols/simple_semi_2.txt index 146ebf45e..d52362ab1 100644 --- a/tests/test_plan_refsols/simple_semi_2.txt +++ b/tests/test_plan_refsols/simple_semi_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/singular_anti.txt b/tests/test_plan_refsols/singular_anti.txt index 7de3c3eec..0f3e69223 100644 --- a/tests/test_plan_refsols/singular_anti.txt +++ b/tests/test_plan_refsols/singular_anti.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('region_name', None:unknown)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index 58a14443e..f2a5c7f3c 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,7 +1,7 @@ ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 8e0003f34..e15d1773c 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -5,7 +5,7 @@ ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], o FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) diff --git a/tests/test_plan_refsols/tpch_q4.txt b/tests/test_plan_refsols/tpch_q4.txt index c2fe0f9a6..3ef651d9a 100644 --- a/tests/test_plan_refsols/tpch_q4.txt +++ b/tests/test_plan_refsols/tpch_q4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('O_ORDERPRIORITY', o_orderpriority), ('ORDER_COUNT', ORDER_COUNT)], orderings=[(o_orderpriority):asc_first]) AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={'ORDER_COUNT': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, columns={'o_orderpriority': t0.o_orderpriority}) FILTER(condition=QUARTER(o_orderdate) == 3:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_commitdate < l_receiptdate, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index dad30206d..d3aa9b4ff 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, columns={'o_totalprice': t0.o_totalprice}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) From efa133562465b1d0be09fb779d6e01f78201828a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 26 Aug 2025 11:25:48 -0400 Subject: [PATCH 092/143] Revisions and documentation [RUN CI] [RUN MYSQL] --- pydough/conversion/hybrid_tree.py | 101 ++++++++++++------ pydough/conversion/relational_converter.py | 23 ++-- tests/test_plan_refsols/agg_max_ranking.txt | 2 +- .../agg_orders_by_year_month_just_europe.txt | 2 +- .../agg_orders_by_year_month_vs_europe.txt | 2 +- .../aggregate_mixed_levels_simple.txt | 2 +- .../aggregate_on_function_call.txt | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 2 +- .../aggregate_then_backref.txt | 2 +- .../aggregation_analytics_2.txt | 2 +- .../aggregation_analytics_3.txt | 2 +- .../test_plan_refsols/avg_acctbal_wo_debt.txt | 2 +- .../avg_order_diff_per_customer.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_1.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_4.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_5.txt | 2 +- tests/test_plan_refsols/common_prefix_a.txt | 4 +- tests/test_plan_refsols/common_prefix_ad.txt | 6 +- tests/test_plan_refsols/common_prefix_ae.txt | 4 +- tests/test_plan_refsols/common_prefix_af.txt | 4 +- tests/test_plan_refsols/common_prefix_al.txt | 6 +- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_an.txt | 6 +- tests/test_plan_refsols/common_prefix_ao.txt | 6 +- tests/test_plan_refsols/common_prefix_ap.txt | 2 +- tests/test_plan_refsols/common_prefix_aq.txt | 8 +- tests/test_plan_refsols/common_prefix_b.txt | 6 +- tests/test_plan_refsols/common_prefix_c.txt | 10 +- tests/test_plan_refsols/common_prefix_d.txt | 12 +-- tests/test_plan_refsols/common_prefix_e.txt | 4 +- tests/test_plan_refsols/common_prefix_f.txt | 6 +- tests/test_plan_refsols/common_prefix_g.txt | 6 +- tests/test_plan_refsols/common_prefix_h.txt | 10 +- tests/test_plan_refsols/common_prefix_i.txt | 4 +- tests/test_plan_refsols/common_prefix_l.txt | 4 +- tests/test_plan_refsols/common_prefix_m.txt | 4 +- tests/test_plan_refsols/common_prefix_n.txt | 4 +- tests/test_plan_refsols/common_prefix_o.txt | 4 +- tests/test_plan_refsols/common_prefix_p.txt | 4 +- tests/test_plan_refsols/common_prefix_q.txt | 2 +- tests/test_plan_refsols/common_prefix_r.txt | 2 +- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/common_prefix_t.txt | 4 +- tests/test_plan_refsols/common_prefix_u.txt | 4 +- tests/test_plan_refsols/common_prefix_x.txt | 4 +- tests/test_plan_refsols/common_prefix_y.txt | 4 +- tests/test_plan_refsols/correl_1.txt | 2 +- tests/test_plan_refsols/correl_13.txt | 2 +- tests/test_plan_refsols/correl_14.txt | 2 +- tests/test_plan_refsols/correl_15.txt | 2 +- tests/test_plan_refsols/correl_2.txt | 2 +- tests/test_plan_refsols/correl_29.txt | 6 +- tests/test_plan_refsols/correl_30.txt | 4 +- tests/test_plan_refsols/correl_6.txt | 2 +- ...count_at_most_100_suppliers_per_nation.txt | 2 +- ...multiple_subcollections_alongside_aggs.txt | 4 +- .../count_single_subcollection.txt | 2 +- .../customer_largest_order_deltas.txt | 4 +- .../customer_most_recent_orders.txt | 2 +- .../customers_sum_line_price.txt | 2 +- .../test_plan_refsols/deep_best_analysis.txt | 4 +- .../epoch_users_most_cold_war_searches.txt | 2 +- .../first_order_per_customer.txt | 2 +- tests/test_plan_refsols/hour_minute_day.txt | 2 +- ...lineitems_access_cust_supplier_nations.txt | 2 +- .../lines_german_supplier_economy_part.txt | 2 +- .../mostly_positive_accounts_per_nation1.txt | 4 +- .../mostly_positive_accounts_per_nation2.txt | 4 +- .../mostly_positive_accounts_per_nation3.txt | 4 +- ...ple_simple_aggregations_multiple_calcs.txt | 4 +- ...ltiple_simple_aggregations_single_calc.txt | 4 +- .../nation_acctbal_breakdown.txt | 2 +- tests/test_plan_refsols/nation_best_order.txt | 2 +- .../nations_order_by_num_suppliers.txt | 2 +- .../nations_sum_line_price.txt | 2 +- .../num_positive_accounts_per_nation.txt | 4 +- .../orders_sum_line_price.txt | 2 +- .../orders_sum_vs_count_line_price.txt | 2 +- .../orders_versus_first_orders.txt | 2 +- .../parts_quantity_increase_95_96.txt | 4 +- .../quantile_function_test_2.txt | 2 +- .../quantile_function_test_3.txt | 2 +- .../quantile_function_test_4.txt | 2 +- .../rank_nations_per_region_by_customers.txt | 2 +- .../region_acctbal_breakdown.txt | 2 +- .../region_orders_from_nations_richest.txt | 2 +- .../regional_first_order_best_line_part.txt | 2 +- .../regional_suppliers_percentile.txt | 2 +- .../regions_sum_line_price.txt | 2 +- tests/test_plan_refsols/semi_aggregate.txt | 2 +- tests/test_plan_refsols/simple_var_std.txt | 2 +- tests/test_plan_refsols/singular1.txt | 2 +- tests/test_plan_refsols/singular2.txt | 2 +- tests/test_plan_refsols/singular3.txt | 2 +- tests/test_plan_refsols/singular4.txt | 2 +- tests/test_plan_refsols/singular6.txt | 2 +- tests/test_plan_refsols/singular7.txt | 6 +- .../sqlite_udf_count_epsilon.txt | 2 +- .../sqlite_udf_covar_pop.txt | 2 +- tests/test_plan_refsols/sqlite_udf_nested.txt | 2 +- .../sqlite_udf_percent_positive.txt | 4 +- .../test_plan_refsols/supplier_best_part.txt | 4 +- .../supplier_pct_national_qty.txt | 2 +- ...ograph_battery_failure_rates_anomalies.txt | 2 +- .../technograph_country_cartesian_oddball.txt | 2 +- ...chnograph_country_combination_analysis.txt | 6 +- ...nograph_country_incident_rate_analysis.txt | 12 +-- ...aph_error_percentages_sun_set_by_error.txt | 4 +- ..._error_rate_sun_set_by_factory_country.txt | 4 +- .../technograph_incident_rate_per_brand.txt | 2 +- .../technograph_most_unreliable_products.txt | 4 +- ...top_5_nations_balance_by_num_suppliers.txt | 2 +- .../top_5_nations_by_num_supplierss.txt | 2 +- .../top_customers_by_orders.txt | 2 +- tests/test_plan_refsols/tpch_q10.txt | 2 +- tests/test_plan_refsols/tpch_q13.txt | 2 +- tests/test_plan_refsols/tpch_q15.txt | 4 +- tests/test_plan_refsols/tpch_q18.txt | 4 +- tests/test_plan_refsols/tpch_q20.txt | 4 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- tests/test_plan_refsols/tpch_q22.txt | 2 +- tests/test_plan_refsols/tpch_q8.txt | 2 +- tests/test_plan_refsols/tpch_q9.txt | 2 +- .../various_aggfuncs_simple.txt | 2 +- .../window_filter_order_1.txt | 2 +- .../window_filter_order_2.txt | 2 +- .../window_filter_order_3.txt | 2 +- .../window_filter_order_4.txt | 2 +- .../window_filter_order_8.txt | 2 +- .../window_filter_order_9.txt | 2 +- .../aggregation_functions_ansi.sql | 36 +++---- .../aggregation_functions_mysql.sql | 36 +++---- .../aggregation_functions_sqlite.sql | 36 +++---- .../defog_broker_basic4_ansi.sql | 31 ++---- .../defog_broker_basic4_mysql.sql | 31 ++---- .../defog_broker_basic4_sqlite.sql | 31 ++---- 138 files changed, 350 insertions(+), 387 deletions(-) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 9567d30f9..13094ea8a 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -613,41 +613,82 @@ def add_child( # Return the index of the newly created child. return new_child_idx + @staticmethod + def infer_metadata_reverse_cardinality( + metadata: SubcollectionRelationshipMetadata, + ) -> JoinCardinality: + """ + Infers the cardinality of the reverse of a join from parent to child + based on the metadata from the parent->child relationship. + + Args: + `metadata`: the metadata for the sub-collection property mapping + the parent to the child. + + Returns: + The join cardinality for the connection from the child back to the + parent, if it can be inferred. Uses `PLURAL_FILTER` as a fallback. + """ + # If there is no reverse, fall back to plural filter. + if ( + not isinstance(metadata, ReversiblePropertyMetadata) + or metadata.reverse is None + ): + return JoinCardinality.PLURAL_FILTER + + # If the reverse property exists, use its properties to + # infer if the reverse cardinality is singular or plural + # and whether a match always exists or not. + cardinality: JoinCardinality = ( + JoinCardinality.PLURAL_ACCESS + if metadata.reverse.is_plural + else JoinCardinality.SINGULAR_ACCESS + ) + if not metadata.reverse.always_matches: + cardinality = cardinality.add_filter() + return cardinality + def infer_root_reverse_cardinality(self) -> JoinCardinality: """ - TODO + Infers the cardinality of the join connecting the root of the hybrid + tree to its parent context. + + Returns: + The inferred cardinality of the join connecting the root of the + hybrid tree to its parent context. """ - if self.parent is None: - match self.pipeline[0]: - case HybridRoot(): - return JoinCardinality.PLURAL_ACCESS - case HybridCollectionAccess(): - cardinality: JoinCardinality = JoinCardinality.PLURAL_ACCESS - if isinstance(self.pipeline[0].collection, SubCollection): - metadata = self.pipeline[0].collection.subcollection_property - if ( - isinstance(metadata, ReversiblePropertyMetadata) - and metadata.reverse is not None - ): - if metadata.reverse.is_plural: - cardinality = JoinCardinality.PLURAL_ACCESS - else: - cardinality = JoinCardinality.SINGULAR_ACCESS - if not metadata.reverse.always_matches: - cardinality = cardinality.add_filter() - return JoinCardinality.PLURAL_ACCESS - return cardinality - case HybridPartition(): - return self.children[0].subtree.infer_root_reverse_cardinality() - case HybridPartitionChild(): - return self.pipeline[0].subtree.infer_root_reverse_cardinality() - case _: - raise NotImplementedError( - f"Invalid start of pipeline: {self.pipeline[0].__class__.__name__}" - ) - else: + # Keep traversing upward until we find the root of the current tree. + if self.parent is not None: return self.parent.infer_root_reverse_cardinality() + # Once we find the root, infer the cardinality of the join that would + # connect just this node to the parent context. The rest of the nodes in + # the tree don't matter since they will not affect how many matches + # there are back to the parent context, or whether there is always a + # match or not for each record in the current context. + match self.pipeline[0]: + case HybridRoot(): + return JoinCardinality.PLURAL_ACCESS + case HybridCollectionAccess(): + # For collection accesses, that are not a sub-collection, just + # use plural access. If they are a sub-collection, infer what + # is the cardinality based on the reverse property. + if isinstance(self.pipeline[0].collection, SubCollection): + return self.infer_metadata_reverse_cardinality( + self.pipeline[0].collection.subcollection_property + ) + else: + return JoinCardinality.PLURAL_ACCESS + # For partition & partition child, infer from the underlying child. + case HybridPartition(): + return self.children[0].subtree.infer_root_reverse_cardinality() + case HybridPartitionChild(): + return self.pipeline[0].subtree.infer_root_reverse_cardinality() + case _: + raise NotImplementedError( + f"Invalid start of pipeline: {self.pipeline[0].__class__.__name__}" + ) + def add_successor(self, successor: "HybridTree") -> None: """ Marks two hybrid trees in a predecessor-successor relationship. diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 25f47acba..bb480cc27 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -18,7 +18,6 @@ SimpleJoinMetadata, SimpleTableMetadata, ) -from pydough.metadata.properties import ReversiblePropertyMetadata from pydough.qdag import ( Calculate, CollectionAccess, @@ -847,22 +846,14 @@ def translate_sub_collection( ) # Infer the cardinality of the join from the perspective of the new - # collection to the existing data. - reverse_cardinality: JoinCardinality - if ( - isinstance( - collection_access.subcollection_property, ReversiblePropertyMetadata + # collection to the existing data. Also, if the parent has any + # additional filters on its side that means a row may not always + # exist, then update the reverse cardinality since it may be filtering. + reverse_cardinality: JoinCardinality = ( + HybridTree.infer_metadata_reverse_cardinality( + collection_access.subcollection_property ) - and collection_access.subcollection_property.reverse is not None - ): - if collection_access.subcollection_property.reverse.is_plural: - reverse_cardinality = JoinCardinality.PLURAL_ACCESS - else: - reverse_cardinality = JoinCardinality.SINGULAR_ACCESS - if not collection_access.subcollection_property.reverse.always_matches: - reverse_cardinality = reverse_cardinality.add_filter() - else: - reverse_cardinality = JoinCardinality.PLURAL_ACCESS + ) if (not reverse_cardinality.filters) and (not parent.always_exists()): reverse_cardinality = reverse_cardinality.add_filter() diff --git a/tests/test_plan_refsols/agg_max_ranking.txt b/tests/test_plan_refsols/agg_max_ranking.txt index d73080147..69663c452 100644 --- a/tests/test_plan_refsols/agg_max_ranking.txt +++ b/tests/test_plan_refsols/agg_max_ranking.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('highest_rank', highest_rank)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'highest_rank': t1.highest_rank, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'highest_rank': t1.highest_rank, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'highest_rank': MAX(cust_rank)}) PROJECT(columns={'c_nationkey': c_nationkey, 'cust_rank': RANKING(args=[], partition=[], order=[(c_acctbal):desc_first], allow_ties=True)}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt index 5a9021e4e..1462eb43f 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt @@ -3,7 +3,7 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', DEFAULT_ AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt index 1bc99c0d2..48b873c76 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt @@ -3,7 +3,7 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', n_rows), AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index 81d20edcc..0b267e0e1 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,5 +1,5 @@ ROOT(columns=[('order_key', o_orderkey), ('max_ratio', max_ratio)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) diff --git a/tests/test_plan_refsols/aggregate_on_function_call.txt b/tests/test_plan_refsols/aggregate_on_function_call.txt index d28ac7f15..3bb9b5dab 100644 --- a/tests/test_plan_refsols/aggregate_on_function_call.txt +++ b/tests/test_plan_refsols/aggregate_on_function_call.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('avg_consumer_value', avg_consumer_value)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 3f4f8df2b..5cebaf11b 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice_1': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index c3fd73955..7c8d13cc8 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 2ef53d9c2..b719a64e1 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 339aaf5d5..f29c91858 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt index 1dbe56459..3eea3dc6d 100644 --- a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt +++ b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', sum_sum_expr_1 / sum_count_expr_1)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) diff --git a/tests/test_plan_refsols/avg_order_diff_per_customer.txt b/tests/test_plan_refsols/avg_order_diff_per_customer.txt index e1703a4fb..edd73115c 100644 --- a/tests/test_plan_refsols/avg_order_diff_per_customer.txt +++ b/tests/test_plan_refsols/avg_order_diff_per_customer.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('avg_diff', avg_diff)], orderings=[(avg_diff):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index 2e05042d5..ada5bf42b 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 452929916..ea3ef915c 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 452929916..ea3ef915c 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index edcdf502e..cdd0319db 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index fb5de210c..3bcfa3875 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index afee2ad7c..d7614e2d9 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 7803949a3..33cb44ec2 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ae.txt b/tests/test_plan_refsols/common_prefix_ae.txt index 32e124483..b52d1aa50 100644 --- a/tests/test_plan_refsols/common_prefix_ae.txt +++ b/tests/test_plan_refsols/common_prefix_ae.txt @@ -1,11 +1,11 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_customers), ('customer_name', customer_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'customer_name': t1.customer_name, 'n_customers': t1.n_customers, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'customer_name': t1.customer_name, 'n_customers': t1.n_customers, 'n_name': t0.n_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'customer_name': MAX(c_name), 'n_customers': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index ce89a069c..79fe346e2 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -1,12 +1,12 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_anything_c_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_anything_c_name': t1.max_anything_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_anything_c_name': t1.max_anything_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_anything_c_name': max_anything_c_name, 'n_rows': n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_anything_c_name': MAX(anything_c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'anything_c_name': t1.anything_c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_c_name': t1.anything_c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'n_rows': COUNT()}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index f674f8512..6f0d546f6 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,9 +1,9 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', n_rows_1)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows_1}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index eb6b60f1f..a6a1c2691 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index cfab97d0b..98beec996 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 508b413ff..aff3ce517 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) @@ -11,7 +11,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ap.txt b/tests/test_plan_refsols/common_prefix_ap.txt index 7f58c39d0..18f665258 100644 --- a/tests/test_plan_refsols/common_prefix_ap.txt +++ b/tests/test_plan_refsols/common_prefix_ap.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('supplier_name', s_name), ('supplier_quantity', ps_availqty), ('supplier_nation', n_name)], orderings=[(p_name):asc_first]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=p_brand == 'Brand#32':string & p_size == 10:numeric & CONTAINS(p_name, 'pink':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey, 'p_size': p_size}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 944f37615..32e6086dd 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -1,13 +1,13 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier', s_name), ('best_part', p_name), ('best_quantity', ps_availqty)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 74c69984a..0dde64141 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT(), 'n_suppliers': SUM(n_suppliers)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 26411f439..73d643b77 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,17 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', n_parts)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_parts': t1.sum_agg_22, 'n_suppliers': t1.sum_sum_expr_18, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_parts': t1.sum_agg_22, 'n_suppliers': t1.sum_sum_expr_18, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_22': SUM(agg_22), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_18': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index c29d975b2..34be4b842 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,14 +1,14 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(agg_29), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index a6acb8f40..48a7f6383 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index f47fe1ec5..27d6f82b4 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_agg_8)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT(), 'sum_agg_8': SUM(agg_8)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 3db1c35e4..a79c04f7d 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_suppliers), ('n_nations', sum_agg_2)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 97ab075cc..e13efa856 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,17 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_agg_22_1), ('n_suppliers', sum_sum_expr_18_1)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_agg_22_1': t1.sum_agg_22_1, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_18_1': t1.sum_sum_expr_18_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_agg_22_1': t1.sum_agg_22_1, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_18_1': t1.sum_sum_expr_18_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_agg_22_1': SUM(agg_22), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_18_1': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index fdb64f108..cf31b1b01 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index c03ee50fe..b44e64f3d 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,13 +1,13 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 1ec0ef2aa..580ad9af1 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -2,10 +2,10 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index e7b6f51be..b43c9a861 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index b4603c210..7e1c73858 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'n_small_parts': sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t1.sum_agg_5, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t1.sum_agg_5, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'sum_agg_5': sum_agg_5, 'sum_p_retailprice': sum_p_retailprice}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 17bd44462..da57de9e5 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows), ('n_parts_ordered', n_rows_1), ('n_distinct_parts', ndistinct_l_partkey)], orderings=[(ndistinct_l_partkey / n_rows_1):asc_first, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index f08e39969..1adcbc62c 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric)), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index f024a6127..2f9ad6dc7 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_l_extendedprice), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_extendedprice': max_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_l_extendedprice': MAX(anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_n_rows': SUM(n_rows), 'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 2dbcd5430..736fcb4c2 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', n_rows), ('most_recent_order_distinct', ndistinct_l_suppkey)], orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 873834947..1b9a18e15 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 1a4c5e05e..5129a5674 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) @@ -7,7 +7,7 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:n SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 6283b3bc5..e1afb10cf 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 2bff6eeda..a5ae2d504 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index 71b6a0111..0c3125585 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_13.txt b/tests/test_plan_refsols/correl_13.txt index 5a738e623..10bc69870 100644 --- a/tests/test_plan_refsols/correl_13.txt +++ b/tests/test_plan_refsols/correl_13.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=s_nationkey <= 3:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 5341118f8..257f6b39f 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'supplier_avg_price': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 0795bc81e..1277ca812 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index 7bfc095a3..bf42b8cd6 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('n_selected_custs', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(n_name):asc_first]) - JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 78b655da0..fe5b94824 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,10 +1,10 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_rows_1), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) @@ -13,7 +13,7 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_n SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index cda02872f..27cb4106c 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -3,7 +3,7 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) @@ -13,7 +13,7 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_6.txt b/tests/test_plan_refsols/correl_6.txt index 6f798d580..3b711b4d7 100644 --- a/tests/test_plan_refsols/correl_6.txt +++ b/tests/test_plan_refsols/correl_6.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_prefix_nations', n_rows)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt index 38615974e..8106d43b0 100644 --- a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt +++ b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('n_top_suppliers', DEFAULT_TO(count_s_suppkey, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) LIMIT(limit=100:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}, orderings=[(s_acctbal):asc_last]) diff --git a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt index 223617b10..7f2716e17 100644 --- a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt +++ b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('num_customers', n_rows), ('num_suppliers', agg_3), ('customer_to_supplier_wealth_ratio', DEFAULT_TO(sum_c_acctbal, 0:numeric) / DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/count_single_subcollection.txt b/tests/test_plan_refsols/count_single_subcollection.txt index 2934bd9b0..c815fa837 100644 --- a/tests/test_plan_refsols/count_single_subcollection.txt +++ b/tests/test_plan_refsols/count_single_subcollection.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('num_customers', num_customers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'num_customers': t1.num_customers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'num_customers': t1.num_customers}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'num_customers': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index f147342e9..0d15bedd4 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_diff) > max_diff, min_diff, max_diff))], orderings=[(IFF(ABS(min_diff) > max_diff, min_diff, max_diff)):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index c9265fae6..0fe2509df 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_recent_value', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last], limit=3:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/customers_sum_line_price.txt b/tests/test_plan_refsols/customers_sum_line_price.txt index 799b8abe5..38916a55c 100644 --- a/tests/test_plan_refsols/customers_sum_line_price.txt +++ b/tests/test_plan_refsols/customers_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', c_custkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index add80975e..a0c1f8f96 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,8 +1,8 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', key_5), ('c_bal', c_acctbal), ('cr_bal', account_balance_13), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', c_custkey)], orderings=[(n_name):asc_first], limit=10:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt index 3115b76b7..55851831c 100644 --- a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt +++ b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt @@ -1,5 +1,5 @@ ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_cold_war_searches)], orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first], limit=3:numeric) - JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_cold_war_searches': COUNT()}) AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) diff --git a/tests/test_plan_refsols/first_order_per_customer.txt b/tests/test_plan_refsols/first_order_per_customer.txt index 3842761d5..a445de391 100644 --- a/tests/test_plan_refsols/first_order_per_customer.txt +++ b/tests/test_plan_refsols/first_order_per_customer.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order_price', o_totalprice)], orderings=[(o_totalprice):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/hour_minute_day.txt b/tests/test_plan_refsols/hour_minute_day.txt index 294688d84..dd12ad5a3 100644 --- a/tests/test_plan_refsols/hour_minute_day.txt +++ b/tests/test_plan_refsols/hour_minute_day.txt @@ -1,5 +1,5 @@ ROOT(columns=[('transaction_id', sbTxId), ('_expr0', HOUR(sbTxDateTime)), ('_expr1', MINUTE(sbTxDateTime)), ('_expr2', SECOND(sbTxDateTime))], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index 589daa0bc..a013e6502 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,6 +1,6 @@ ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt index 531abfce9..1987fec13 100644 --- a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt +++ b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt @@ -1,6 +1,6 @@ ROOT(columns=[('order_key', l_orderkey), ('ship_date', l_shipdate), ('extended_price', l_extendedprice)], orderings=[]) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt index e8baa1741..75ee4ba1f 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt index 55cdf7f5d..ae366e0cd 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', count_s_suppkey), ('total_suppliers', count_s_suppkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index 6198ebc29..f5bf7c1f4 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index 18e86d4d9..cc5f9cfd0 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal_1': AVG(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt index a835cca09..94e0925e0 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/nation_acctbal_breakdown.txt b/tests/test_plan_refsols/nation_acctbal_breakdown.txt index 23594226f..baee60be6 100644 --- a/tests/test_plan_refsols/nation_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/nation_acctbal_breakdown.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_name': t0.n_name, 'n_red_acctbal': t1.n_red_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_name': t0.n_name, 'n_red_acctbal': t1.n_red_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index a204db9ef..cacdcf7ea 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', o_orderkey), ('order_value', o_totalprice), ('value_percentage', value_percentage)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'value_percentage': t1.value_percentage}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'value_percentage': t1.value_percentage}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nations_order_by_num_suppliers.txt b/tests/test_plan_refsols/nations_order_by_num_suppliers.txt index 81fcffc76..4742eb831 100644 --- a/tests/test_plan_refsols/nations_order_by_num_suppliers.txt +++ b/tests/test_plan_refsols/nations_order_by_num_suppliers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/nations_sum_line_price.txt b/tests/test_plan_refsols/nations_sum_line_price.txt index 6c3ad7c6f..1e4b3221c 100644 --- a/tests/test_plan_refsols/nations_sum_line_price.txt +++ b/tests/test_plan_refsols/nations_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', n_nationkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'l_extendedprice': t1.l_extendedprice}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index add0e0f3c..58e419b53 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/orders_sum_line_price.txt b/tests/test_plan_refsols/orders_sum_line_price.txt index e0220e6ff..0ffd28d94 100644 --- a/tests/test_plan_refsols/orders_sum_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', o_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt index 455520f65..046adabcd 100644 --- a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', o_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 915d68fb4..8ac11284d 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', DATEDIFF('days':string, order_date_8, o_orderdate))], orderings=[(DATEDIFF('days':string, order_date_8, o_orderdate)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index be38b3dc5..16762bac8 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index 163793d83..39d7a7218 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index 163793d83..39d7a7218 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index b49c995d4..1fe7deeea 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 610f76d3a..acb2cb036 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first]))], orderings=[(RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])):asc_first], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/region_acctbal_breakdown.txt b/tests/test_plan_refsols/region_acctbal_breakdown.txt index 88446e286..b6193e52f 100644 --- a/tests/test_plan_refsols/region_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/region_acctbal_breakdown.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/region_orders_from_nations_richest.txt b/tests/test_plan_refsols/region_orders_from_nations_richest.txt index 3a4c4a90a..b8c4cc603 100644 --- a/tests/test_plan_refsols/region_orders_from_nations_richest.txt +++ b/tests/test_plan_refsols/region_orders_from_nations_richest.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/regional_first_order_best_line_part.txt b/tests/test_plan_refsols/regional_first_order_best_line_part.txt index 3f1e334ca..f3af15301 100644 --- a/tests/test_plan_refsols/regional_first_order_best_line_part.txt +++ b/tests/test_plan_refsols/regional_first_order_best_line_part.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('part_name', p_name)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(l_quantity):desc_first, (l_linenumber):asc_last], allow_ties=False) == 1:numeric, columns={'l_partkey': l_partkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/regional_suppliers_percentile.txt b/tests/test_plan_refsols/regional_suppliers_percentile.txt index 93b25c240..6a369877b 100644 --- a/tests/test_plan_refsols/regional_suppliers_percentile.txt +++ b/tests/test_plan_refsols/regional_suppliers_percentile.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', s_name)], orderings=[]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(n_rows):asc_last, (s_name):asc_last], n_buckets=1000) == 1000:numeric, columns={'s_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/regions_sum_line_price.txt b/tests/test_plan_refsols/regions_sum_line_price.txt index 67a0c6401..c36ca2e5a 100644 --- a/tests/test_plan_refsols/regions_sum_line_price.txt +++ b/tests/test_plan_refsols/regions_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', r_regionkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 9d44d3f56..62c91f92a 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/simple_var_std.txt b/tests/test_plan_refsols/simple_var_std.txt index 505bad30b..a829a911a 100644 --- a/tests/test_plan_refsols/simple_var_std.txt +++ b/tests/test_plan_refsols/simple_var_std.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('var', pop_var), ('std', pop_std), ('sample_var', sample_var), ('sample_std', sample_std), ('pop_var', pop_var), ('pop_std', pop_std)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) FILTER(condition=ISIN(n_name, ['ALGERIA', 'ARGENTINA']:array[unknown]), columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'pop_std': POPULATION_STD(s_acctbal), 'pop_var': POPULATION_VARIANCE(s_acctbal), 'sample_std': SAMPLE_STD(s_acctbal), 'sample_var': SAMPLE_VARIANCE(s_acctbal)}) diff --git a/tests/test_plan_refsols/singular1.txt b/tests/test_plan_refsols/singular1.txt index 665f6f40c..a33283726 100644 --- a/tests/test_plan_refsols/singular1.txt +++ b/tests/test_plan_refsols/singular1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('nation_4_name', n_name)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=n_nationkey == 4:numeric, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/singular2.txt b/tests/test_plan_refsols/singular2.txt index 294c7666f..0020aff21 100644 --- a/tests/test_plan_refsols/singular2.txt +++ b/tests/test_plan_refsols/singular2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('okey', o_orderkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) FILTER(condition=c_custkey == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/singular3.txt b/tests/test_plan_refsols/singular3.txt index e3f6787ca..bac588a67 100644 --- a/tests/test_plan_refsols/singular3.txt +++ b/tests/test_plan_refsols/singular3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) LIMIT(limit=5:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index f8ce4600b..b73d59ebf 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/singular6.txt b/tests/test_plan_refsols/singular6.txt index a547c658e..3e9681540 100644 --- a/tests/test_plan_refsols/singular6.txt +++ b/tests/test_plan_refsols/singular6.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', n_name)], orderings=[(l_receiptdate):asc_first, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) FILTER(condition=c_nationkey == 4:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 9bf93569a..e49975afb 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,11 +1,11 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(n_rows, 0:numeric)):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt index 2eff260ee..e66ff4ccf 100644 --- a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_cust', n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=EPSILON(c_acctbal, avg_balance, avg_balance * 0.1:numeric), columns={'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt index 3adfbe06d..2f501dbc7 100644 --- a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt +++ b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', ROUND(agg_0, 3:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, o_totalprice / 1000000.0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_nested.txt b/tests/test_plan_refsols/sqlite_udf_nested.txt index ca252a32f..f2fcd5190 100644 --- a/tests/test_plan_refsols/sqlite_udf_nested.txt +++ b/tests/test_plan_refsols/sqlite_udf_nested.txt @@ -2,7 +2,7 @@ ROOT(columns=[('p', ROUND(percentage_expr_1, 2:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'percentage_expr_1': PERCENTAGE(DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool))}) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'min_o_orderdate': MIN(o_orderdate), 'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt index b2444dc7c..117c0bfb9 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', r_name), ('pct_cust_positive', ROUND(percentage_expr_2, 2:numeric)), ('pct_supp_positive', ROUND(percentage_expr_3, 2:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(POSITIVE(c_acctbal))}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index b9e46e082..91077088f 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('n_shipments', n_rows)], orderings=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (s_name):asc_first], limit=3:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index f2a5c7f3c..cf94b18f6 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,5 +1,5 @@ ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index 00dcdb1fe..f879befc3 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,6 +1,6 @@ ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt b/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt index 0a986eb55..526c2a80a 100644 --- a/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt +++ b/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', co_name), ('n_other_countries', n_other_countries)], orderings=[(co_name):asc_first]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_other_countries': t1.n_other_countries}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'co_name': t0.co_name, 'n_other_countries': t1.n_other_countries}) SCAN(table=main.COUNTRIES, columns={'co_name': co_name}) AGGREGATE(keys={}, aggregations={'n_other_countries': COUNT()}) SCAN(table=main.COUNTRIES, columns={}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index b9a9772c7..79a796b98 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,12 +1,12 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index 1cf03558d..1bd54185e 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,20 +1,20 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'de_purchase_country_id': de_purchase_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) diff --git a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt index b66d05d00..a402bbd57 100644 --- a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt +++ b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt @@ -1,8 +1,8 @@ ROOT(columns=[('error', er_name), ('pct', ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[(ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric)):desc_last]) - JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) AGGREGATE(keys={'in_error_id': in_error_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'in_error_id': t0.in_error_id}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'in_error_id': t0.in_error_id}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index b2c4d2f9a..24ffa13a9 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,9 +1,9 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_incidents, 0:numeric)}) AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index 7af10fc70..e3bd2bb8e 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,6 +1,6 @@ ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 7abb7f5b8..763603e0b 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,8 +1,8 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) + JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt index 716f03473..e103270cd 100644 --- a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt +++ b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt index 5983915cd..c74bef837 100644 --- a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt +++ b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_customers_by_orders.txt b/tests/test_plan_refsols/top_customers_by_orders.txt index f9ae80b9e..399f36eb7 100644 --- a/tests/test_plan_refsols/top_customers_by_orders.txt +++ b/tests/test_plan_refsols/top_customers_by_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('customer_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_custkey):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index 5bd99fb20..6614256b2 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (c_custkey):asc_first], limit=20:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q13.txt b/tests/test_plan_refsols/tpch_q13.txt index 2646b1149..430e21253 100644 --- a/tests/test_plan_refsols/tpch_q13.txt +++ b/tests/test_plan_refsols/tpch_q13.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last], limit=10:numeric) AGGREGATE(keys={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}, aggregations={'CUSTDIST': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index b7aa4880a..5c423740e 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,8 +1,8 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[(s_suppkey):asc_first]) - JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) + JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'max_revenue': MAX(DEFAULT_TO(sum_expr_2, 0:numeric))}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(l_extendedprice * 1:numeric - l_discount)}) FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index 9a3b55e20..6f14bbecb 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=DEFAULT_TO(sum_l_quantity, 0:numeric) > 300:numeric, columns={'l_orderkey': l_orderkey, 'sum_l_quantity': sum_l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index a09dd5caf..012ae3c85 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -1,5 +1,5 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) @@ -7,7 +7,7 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) JOIN(condition=t0.ps_partkey == t1.p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index e15d1773c..52d68c1b0 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,5 +1,5 @@ ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 5c7637dd0..fad4b1cdf 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,7 +1,7 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) diff --git a/tests/test_plan_refsols/tpch_q8.txt b/tests/test_plan_refsols/tpch_q8.txt index f54d41956..c32a5dca3 100644 --- a/tests/test_plan_refsols/tpch_q8.txt +++ b/tests/test_plan_refsols/tpch_q8.txt @@ -2,7 +2,7 @@ ROOT(columns=[('O_YEAR', O_YEAR), ('MKT_SHARE', DEFAULT_TO(sum_brazil_volume, 0: AGGREGATE(keys={'O_YEAR': YEAR(o_orderdate)}, aggregations={'sum_brazil_volume': SUM(IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index d032c5bc1..1f5e1a933 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -3,7 +3,7 @@ ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_ JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index c98d2d967..c5ca7c2ea 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal_1': COUNT(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'min_c_acctbal_1': MIN(c_acctbal), 'n_rows_1': COUNT(), 'sum_c_acctbal_1': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index 3e326b484..8ea95d8cf 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index 3e326b484..8ea95d8cf 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index 3e326b484..8ea95d8cf 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_4.txt b/tests/test_plan_refsols/window_filter_order_4.txt index f4240ff57..788d47b44 100644 --- a/tests/test_plan_refsols/window_filter_order_4.txt +++ b/tests/test_plan_refsols/window_filter_order_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 30462c7cd..ee43f0aed 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & ABSENT(n_rows), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_9.txt b/tests/test_plan_refsols/window_filter_order_9.txt index 642f47f0a..189b9c99b 100644 --- a/tests/test_plan_refsols/window_filter_order_9.txt +++ b/tests/test_plan_refsols/window_filter_order_9.txt @@ -5,7 +5,7 @@ ROOT(columns=[('n', n)], orderings=[]) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) PROJECT(columns={'c_custkey': c_custkey, 'expr_0': 1:numeric, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_sql_refsols/aggregation_functions_ansi.sql b/tests/test_sql_refsols/aggregation_functions_ansi.sql index 55964db76..51d482591 100644 --- a/tests/test_sql_refsols/aggregation_functions_ansi.sql +++ b/tests/test_sql_refsols/aggregation_functions_ansi.sql @@ -19,29 +19,25 @@ WITH _s1 AS ( STDDEV(customer.c_acctbal) AS sample_std_c_acctbal, VARIANCE(customer.c_acctbal) AS sample_variance_c_acctbal, SUM(customer.c_acctbal) AS sum_c_acctbal, - SUM(_s1.n_rows) AS sum_n_rows, - customer.c_nationkey + SUM(_s1.n_rows) AS sum_n_rows FROM tpch.customer AS customer LEFT JOIN _s1 AS _s1 ON _s1.o_custkey = customer.c_custkey GROUP BY - 13 + customer.c_nationkey ) SELECT - COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, - _t1.avg_c_acctbal AS avg_value, - _t1.median_c_acctbal AS median_value, - _t1.min_c_acctbal AS min_value, - _t1.max_c_acctbal AS max_value, - _t1.agg_7 AS quantile_value, - _t1.anything_c_acctbal AS anything_value, - _t1.count_c_acctbal AS count_value, - _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value -FROM tpch.nation AS nation -JOIN _t1 AS _t1 - ON _t1.c_nationkey = nation.n_nationkey - AND ( - _t1.sum_n_rows = 0 OR _t1.sum_n_rows IS NULL - ) + COALESCE(sum_c_acctbal, 0) AS sum_value, + avg_c_acctbal AS avg_value, + median_c_acctbal AS median_value, + min_c_acctbal AS min_value, + max_c_acctbal AS max_value, + agg_7 AS quantile_value, + anything_c_acctbal AS anything_value, + count_c_acctbal AS count_value, + ndistinct_c_acctbal AS count_distinct_value, + sample_variance_c_acctbal AS variance_value, + sample_std_c_acctbal AS stddev_value +FROM _t1 +WHERE + sum_n_rows = 0 OR sum_n_rows IS NULL diff --git a/tests/test_sql_refsols/aggregation_functions_mysql.sql b/tests/test_sql_refsols/aggregation_functions_mysql.sql index 9e2e68cef..85f2429d5 100644 --- a/tests/test_sql_refsols/aggregation_functions_mysql.sql +++ b/tests/test_sql_refsols/aggregation_functions_mysql.sql @@ -72,27 +72,23 @@ WITH _s1 AS ( COUNT(c_acctbal) - 1 ) AS sample_variance_c_acctbal, SUM(c_acctbal) AS sum_c_acctbal, - SUM(n_rows) AS sum_n_rows, - c_nationkey + SUM(n_rows) AS sum_n_rows FROM _t2 GROUP BY - 13 + c_nationkey ) SELECT - COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, - _t1.avg_c_acctbal AS avg_value, - _t1.avg_expr_15 AS median_value, - _t1.min_c_acctbal AS min_value, - _t1.max_c_acctbal AS max_value, - _t1.max_expr_16 AS quantile_value, - _t1.anything_c_acctbal AS anything_value, - _t1.count_c_acctbal AS count_value, - _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value -FROM tpch.NATION AS NATION -JOIN _t1 AS _t1 - ON NATION.n_nationkey = _t1.c_nationkey - AND ( - _t1.sum_n_rows = 0 OR _t1.sum_n_rows IS NULL - ) + COALESCE(sum_c_acctbal, 0) AS sum_value, + avg_c_acctbal AS avg_value, + avg_expr_15 AS median_value, + min_c_acctbal AS min_value, + max_c_acctbal AS max_value, + max_expr_16 AS quantile_value, + anything_c_acctbal AS anything_value, + count_c_acctbal AS count_value, + ndistinct_c_acctbal AS count_distinct_value, + sample_variance_c_acctbal AS variance_value, + sample_std_c_acctbal AS stddev_value +FROM _t1 +WHERE + sum_n_rows = 0 OR sum_n_rows IS NULL diff --git a/tests/test_sql_refsols/aggregation_functions_sqlite.sql b/tests/test_sql_refsols/aggregation_functions_sqlite.sql index b2f1e4afc..01f40f98a 100644 --- a/tests/test_sql_refsols/aggregation_functions_sqlite.sql +++ b/tests/test_sql_refsols/aggregation_functions_sqlite.sql @@ -69,27 +69,23 @@ WITH _s1 AS ( COUNT(c_acctbal) - 1 ) AS sample_variance_c_acctbal, SUM(c_acctbal) AS sum_c_acctbal, - SUM(n_rows) AS sum_n_rows, - c_nationkey + SUM(n_rows) AS sum_n_rows FROM _t2 GROUP BY - 13 + c_nationkey ) SELECT - COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, - _t1.avg_c_acctbal AS avg_value, - _t1.avg_expr_15 AS median_value, - _t1.min_c_acctbal AS min_value, - _t1.max_c_acctbal AS max_value, - _t1.max_expr_16 AS quantile_value, - _t1.anything_c_acctbal AS anything_value, - _t1.count_c_acctbal AS count_value, - _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value -FROM tpch.nation AS nation -JOIN _t1 AS _t1 - ON _t1.c_nationkey = nation.n_nationkey - AND ( - _t1.sum_n_rows = 0 OR _t1.sum_n_rows IS NULL - ) + COALESCE(sum_c_acctbal, 0) AS sum_value, + avg_c_acctbal AS avg_value, + avg_expr_15 AS median_value, + min_c_acctbal AS min_value, + max_c_acctbal AS max_value, + max_expr_16 AS quantile_value, + anything_c_acctbal AS anything_value, + count_c_acctbal AS count_value, + ndistinct_c_acctbal AS count_distinct_value, + sample_variance_c_acctbal AS variance_value, + sample_std_c_acctbal AS stddev_value +FROM _t1 +WHERE + sum_n_rows = 0 OR sum_n_rows IS NULL diff --git a/tests/test_sql_refsols/defog_broker_basic4_ansi.sql b/tests/test_sql_refsols/defog_broker_basic4_ansi.sql index d078e262b..5f26d0a69 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_ansi.sql @@ -1,31 +1,12 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS num_transactions, - sbtxcustid, - sbtxtickerid - FROM main.sbtransaction - GROUP BY - 2, - 3 -), _s2 AS ( - SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbticker.sbtickertype, - _s0.sbtxcustid - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbtxtickerid = sbticker.sbtickerid - GROUP BY - 2, - 3 -) SELECT sbcustomer.sbcuststate AS state, - _s2.sbtickertype AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 + sbticker.sbtickertype AS ticker_type, + COUNT(*) AS num_transactions +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid JOIN main.sbcustomer AS sbcustomer - ON _s2.sbtxcustid = sbcustomer.sbcustid + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_broker_basic4_mysql.sql b/tests/test_sql_refsols/defog_broker_basic4_mysql.sql index 1fcd150c0..d6e7fda23 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_mysql.sql @@ -1,31 +1,12 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS num_transactions, - sbtxcustid AS sbTxCustId, - sbtxtickerid AS sbTxTickerId - FROM main.sbTransaction - GROUP BY - 2, - 3 -), _s2 AS ( - SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbTicker.sbtickertype AS sbTickerType, - _s0.sbTxCustId - FROM _s0 AS _s0 - JOIN main.sbTicker AS sbTicker - ON _s0.sbTxTickerId = sbTicker.sbtickerid - GROUP BY - 2, - 3 -) SELECT sbCustomer.sbcuststate AS state, - _s2.sbTickerType AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 + sbTicker.sbtickertype AS ticker_type, + COUNT(*) AS num_transactions +FROM main.sbTransaction AS sbTransaction +JOIN main.sbTicker AS sbTicker + ON sbTicker.sbtickerid = sbTransaction.sbtxtickerid JOIN main.sbCustomer AS sbCustomer - ON _s2.sbTxCustId = sbCustomer.sbcustid + ON sbCustomer.sbcustid = sbTransaction.sbtxcustid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql index d078e262b..5f26d0a69 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql @@ -1,31 +1,12 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS num_transactions, - sbtxcustid, - sbtxtickerid - FROM main.sbtransaction - GROUP BY - 2, - 3 -), _s2 AS ( - SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbticker.sbtickertype, - _s0.sbtxcustid - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbtxtickerid = sbticker.sbtickerid - GROUP BY - 2, - 3 -) SELECT sbcustomer.sbcuststate AS state, - _s2.sbtickertype AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 + sbticker.sbtickertype AS ticker_type, + COUNT(*) AS num_transactions +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid JOIN main.sbcustomer AS sbcustomer - ON _s2.sbtxcustid = sbcustomer.sbcustid + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid GROUP BY 1, 2 From b4e3318c5f2183628c1be463e752bcc97a4ba9dc Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 8 Sep 2025 15:54:06 -0400 Subject: [PATCH 093/143] Minor revisions --- pydough/conversion/hybrid_tree.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index fa01d2b0c..db0d3cbf8 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -619,7 +619,7 @@ def infer_metadata_reverse_cardinality( ) -> JoinCardinality: """ Infers the cardinality of the reverse of a join from parent to child - based on the metadata from the parent->child relationship. + based on the metadata of the reverse-relationship, if one exists. Args: `metadata`: the metadata for the sub-collection property mapping @@ -629,7 +629,8 @@ def infer_metadata_reverse_cardinality( The join cardinality for the connection from the child back to the parent, if it can be inferred. Uses `PLURAL_FILTER` as a fallback. """ - # If there is no reverse, fall back to plural filter. + # If there is no reverse, fall back to plural filter (which is the + # safest default assumption). if ( not isinstance(metadata, ReversiblePropertyMetadata) or metadata.reverse is None @@ -639,13 +640,16 @@ def infer_metadata_reverse_cardinality( # If the reverse property exists, use its properties to # infer if the reverse cardinality is singular or plural # and whether a match always exists or not. - cardinality: JoinCardinality = ( - JoinCardinality.PLURAL_ACCESS - if metadata.reverse.is_plural - else JoinCardinality.SINGULAR_ACCESS - ) - if not metadata.reverse.always_matches: - cardinality = cardinality.add_filter() + cardinality: JoinCardinality + match (metadata.reverse.is_plural, metadata.reverse.always_matches): + case (False, True): + cardinality = JoinCardinality.SINGULAR_ACCESS + case (False, False): + cardinality = JoinCardinality.SINGULAR_FILTER + case (True, True): + cardinality = JoinCardinality.PLURAL_ACCESS + case (True, False): + cardinality = JoinCardinality.PLURAL_FILTER return cardinality def infer_root_reverse_cardinality(self) -> JoinCardinality: From 091a3538585a76dd5df7c3a61db46272d108bd13 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 10 Sep 2025 14:17:39 -0400 Subject: [PATCH 094/143] Adjusting edge case for correlation extraction affecting cardinality --- pydough/conversion/hybrid_correlation_extraction.py | 6 ++++++ tests/test_plan_refsols/correl_1.txt | 2 +- tests/test_plan_refsols/correl_33.txt | 2 +- tests/test_plan_refsols/correl_8.txt | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pydough/conversion/hybrid_correlation_extraction.py b/pydough/conversion/hybrid_correlation_extraction.py index 5a1ff44e3..d27a0e918 100644 --- a/pydough/conversion/hybrid_correlation_extraction.py +++ b/pydough/conversion/hybrid_correlation_extraction.py @@ -238,6 +238,9 @@ def attempt_correlation_extraction( for _, rhs_key in new_equi_filters: bottom_subtree.agg_keys.append(rhs_key) connection.always_exists = False + connection.reverse_cardinality = ( + connection.reverse_cardinality.add_filter() + ) if len(new_general_filters) > 0: if bottom_subtree.general_join_condition is not None: @@ -262,6 +265,9 @@ def attempt_correlation_extraction( pydop.BAN, new_general_filters, BooleanType() ) connection.always_exists = False + connection.reverse_cardinality = ( + connection.reverse_cardinality.add_filter() + ) # Update the filter condition with the new conjunction of terms if new_conjunction != conjunction: diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index 0c3125585..352d9d69f 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_33.txt b/tests/test_plan_refsols/correl_33.txt index 57219f30d..614d9cd79 100644 --- a/tests/test_plan_refsols/correl_33.txt +++ b/tests/test_plan_refsols/correl_33.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) - JOIN(condition=MONTH(t0.min_o_orderdate) == t1.month_o_orderdate & YEAR(t0.min_o_orderdate) == t1.year_o_orderdate, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows}) + JOIN(condition=MONTH(t0.min_o_orderdate) == t1.month_o_orderdate & YEAR(t0.min_o_orderdate) == t1.year_o_orderdate, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) AGGREGATE(keys={}, aggregations={'min_o_orderdate': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month_o_orderdate': MONTH(o_orderdate), 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/correl_8.txt b/tests/test_plan_refsols/correl_8.txt index 3f1326205..ed1c5cfe8 100644 --- a/tests/test_plan_refsols/correl_8.txt +++ b/tests/test_plan_refsols/correl_8.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rname', r_name)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) PROJECT(columns={'expr_0': SLICE(r_name, None:unknown, 1:numeric, None:unknown), 'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) From d903c9d9043bd123f71244dbe6a41ae7de514817 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 17 Sep 2025 15:14:58 -0400 Subject: [PATCH 095/143] temporary reversion as setup is being adjusted --- pydough/conversion/join_agg_transpose.py | 182 ++++++++---------- .../aggregation_analytics_2.txt | 8 +- .../aggregation_analytics_3.txt | 8 +- tests/test_plan_refsols/correl_30.txt | 26 +-- .../multi_partition_access_3.txt | 14 +- .../multi_partition_access_4.txt | 12 +- tests/test_sql_refsols/correl_30_sqlite.sql | 5 +- .../defog_broker_adv5_ansi.sql | 57 +++--- .../defog_broker_adv5_mysql.sql | 46 ++--- .../defog_broker_adv5_postgres.sql | 46 ++--- .../defog_broker_adv5_snowflake.sql | 46 ++--- .../defog_broker_adv5_sqlite.sql | 50 ++--- .../defog_dealership_adv11_ansi.sql | 22 +-- .../defog_dealership_adv11_mysql.sql | 22 +-- .../defog_dealership_adv11_postgres.sql | 22 +-- .../defog_dealership_adv11_snowflake.sql | 22 +-- .../defog_dealership_adv11_sqlite.sql | 22 +-- .../defog_dealership_gen4_ansi.sql | 30 +-- .../defog_dealership_gen4_mysql.sql | 32 +-- .../defog_dealership_gen4_postgres.sql | 30 +-- .../defog_dealership_gen4_snowflake.sql | 30 +-- .../defog_dealership_gen4_sqlite.sql | 32 +-- ...aph_incident_rate_by_release_year_ansi.sql | 9 - ...h_incident_rate_by_release_year_sqlite.sql | 9 - 24 files changed, 369 insertions(+), 413 deletions(-) diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py index d16fec832..6f32751b1 100644 --- a/pydough/conversion/join_agg_transpose.py +++ b/pydough/conversion/join_agg_transpose.py @@ -12,15 +12,13 @@ ColumnReference, ColumnReferenceFinder, Join, + JoinCardinality, JoinType, RelationalExpression, RelationalNode, RelationalRoot, RelationalShuttle, ) -from pydough.relational.rel_util import ( - add_input_name, -) class JoinAggregateTransposeShuttle(RelationalShuttle): @@ -35,10 +33,25 @@ def reset(self): self.finder.reset() def visit_join(self, node: Join) -> RelationalNode: + result: RelationalNode | None = None + + # Attempt the transpose where the left input is an Aggregate. If it + # succeeded, use that as the result and recursively transform its + # inputs. if isinstance(node.inputs[0], Aggregate): - return self.generic_visit_inputs( - self.join_aggregate_transpose(node, node.inputs[0]) - ) + result = self.join_aggregate_transpose(node, node.inputs[0], True) + if result is not None: + return self.generic_visit_inputs(result) + + # If the attempt failed, then attempt the transpose where the right + # input is an Aggregate. If this attempt succeeded, use that as the + # result and recursively transform its inputs. + if isinstance(node.inputs[1], Aggregate): + result = self.join_aggregate_transpose(node, node.inputs[1], False) + if result is not None: + return self.generic_visit_inputs(result) + + # If this attempt failed, fall back to the regular implementation. return super().visit_join(node) def generate_name(self, base: str, used_names: Iterable[str]) -> str: @@ -57,19 +70,22 @@ def generate_name(self, base: str, used_names: Iterable[str]) -> str: i += 1 def join_aggregate_transpose( - self, join: Join, aggregate: Aggregate - ) -> RelationalNode: + self, join: Join, aggregate: Aggregate, is_left: bool + ) -> RelationalNode | None: """ Transposes a Join above an Aggregate into an Aggregate above a Join, - when possible. + when possible and it would be better for performance to use the join + first to filter some of the rows before aggregating. Args: `join`: the Join node above the Aggregate. `aggregate`: the Aggregate node that is the left input to the Join. + `is_left`: whether the Aggregate is the left input to the Join + (True) or the right input (False). Returns: - The new RelationalNode tree with the Join and Aggregate transposed, or - the original Join if the transpose is not possible. + The new RelationalNode tree with the Join and Aggregate transposed, + or None if the transpose is not possible. """ # Verify that the join is an inner, left, or semi-join, and that the # join cardinality is singular (unless the aggregations are not affected @@ -78,115 +94,79 @@ def join_aggregate_transpose( call.op in (pydop.MIN, pydop.MAX, pydop.ANYTHING, pydop.NDISTINCT) for call in aggregate.aggregations.values() ) + + # The cardinality with regards to the input being considered must be + # singular (unless the aggregations allow plural), and must be + # filtering (since the point of joining before aggregation is to reduce + # the number of rows to aggregate). + cardinality: JoinCardinality = ( + join.cardinality if is_left else join.reverse_cardinality + ) + + # Verify the cardinality meets the specified criteria, and that the join + # type is INNER/SEMI (since LEFT would not be filtering), where SEMI is + # only allowed if the aggregation is on the left. if not ( - join.join_type in (JoinType.INNER, JoinType.SEMI) - and (join.cardinality.singular or aggs_allow_plural) + ( + (join.join_type == JoinType.INNER) + or (join.join_type == JoinType.SEMI and is_left) + ) + and cardinality.filters + and (cardinality.singular or aggs_allow_plural) ): - return join + return None + + # The alias of the input to the join that corresponds to the + # aggregate. + desired_alias: str | None = ( + join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + ) # Find all of the columns used in the join condition that come from the - # left-hand side of the join. + # aggregate side of the join self.finder.reset() join.condition.accept(self.finder) - lhs_condition_columns: set[ColumnReference] = { + agg_condition_columns: set[ColumnReference] = { col for col in self.finder.get_column_references() - if col.input_name == join.default_input_aliases[0] + if col.input_name == desired_alias } - # Verify that there is at least one left hand side condition column, - # and all of them are grouping keys in the aggregate. - if len(lhs_condition_columns) == 0 or any( - col.name not in aggregate.keys for col in lhs_condition_columns + # Verify ALL of the condition columns from that side of the join are + # in the aggregate keys. + if len(agg_condition_columns) == 0 or any( + col.name not in aggregate.keys for col in agg_condition_columns ): - return join - - reverse_join_columns: dict[str, RelationalExpression] = {} - for join_col_name, join_col_expr in join.columns.items(): - assert isinstance(join_col_expr, ColumnReference) - reverse_join_columns[join_col_expr.name] = ColumnReference( - join_col_name, join_col_expr.data_type - ) + return None new_join_columns: dict[str, RelationalExpression] = {} - new_key_columns: dict[str, RelationalExpression] = {} - new_aggregate_columns: dict[str, CallExpression] = {} - used_column_names: set[str] = set() - - for col_name, col_expr in join.columns.items(): - self.finder.reset() - col_expr.accept(self.finder) - if all( - expr.input_name == join.default_input_aliases[1] - for expr in self.finder.get_column_references() - ): - new_join_columns[col_name] = col_expr - new_aggregate_columns[col_name] = CallExpression( - pydop.ANYTHING, - col_expr.data_type, - [ColumnReference(col_name, col_expr.data_type)], - ) - used_column_names.add(col_name) - elif not ( - isinstance(col_expr, ColumnReference) - and col_expr.input_name == join.default_input_aliases[0] - ): - return join - - for key_name, key_expr in aggregate.keys.items(): - new_join_columns[key_name] = add_input_name( - key_expr, join.default_input_aliases[0] - ) - agg_key_name: str = self.generate_name(key_name, used_column_names) - new_key_columns[agg_key_name] = ColumnReference( - key_name, col_expr.data_type - ) - used_column_names.add(agg_key_name) - - for agg_name, agg_expr in aggregate.aggregations.items(): - new_inputs: list[RelationalExpression] = [] - for input_expr in agg_expr.inputs: - join_name: str - if isinstance(input_expr, ColumnReference): - join_name = self.generate_name(input_expr.name, new_join_columns) - else: - join_name = self.generate_name("expr", new_join_columns) - new_join_columns[join_name] = add_input_name( - input_expr, join.default_input_aliases[0] - ) - new_inputs.append(ColumnReference(join_name, input_expr.data_type)) - agg_name = self.generate_name(agg_name, used_column_names) - if new_inputs != agg_expr.inputs: - agg_expr = CallExpression( - agg_expr.op, - agg_expr.data_type, - new_inputs, - ) - new_aggregate_columns[agg_name] = agg_expr - used_column_names.add(agg_name) + new_aggregate_aggs: dict[str, CallExpression] = {} + new_aggregate_keys: dict[str, RelationalExpression] = {} + + new_condition: RelationalExpression = join.condition + agg_input: RelationalNode = aggregate.inputs[0] + non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] + new_join_inputs: list[RelationalNode] = ( + [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] + ) + + # TODO: FINISH THIS + return None new_join: Join = Join( - inputs=[aggregate.inputs[0], join.inputs[1]], - condition=join.condition, - columns=new_join_columns, - join_type=join.join_type, - cardinality=join.cardinality, + new_join_inputs, + new_condition, + join.join_type, + new_join_columns, + join.cardinality, + join.reverse_cardinality, + join.correl_name, ) - new_aggregate = Aggregate( - input=new_join, keys=new_key_columns, aggregations=new_aggregate_columns + new_aggregate: Aggregate = Aggregate( + new_join, new_aggregate_keys, new_aggregate_aggs ) - # print() - # print(join.to_tree_string()) - # print(lhs_condition_columns) - # print(new_join_columns) - # print(new_key_columns) - # print(new_aggregate_columns) - # print(new_aggregate.to_tree_string()) - # breakpoint() - # return join - return new_aggregate diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index c605d16b7..b719a64e1 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('part_name', anything_p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (anything_p_name):asc_first], limit=4:numeric) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'l_tax': t0.l_tax, 'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) +ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) @@ -11,4 +11,4 @@ ROOT(columns=[('part_name', anything_p_name), ('revenue_generated', ROUND(DEFAUL SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 3740c9c7d..f29c91858 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('part_name', anything_p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (anything_p_name):asc_first], limit=3:numeric) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'l_tax': t0.l_tax, 'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) +ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) @@ -11,4 +11,4 @@ ROOT(columns=[('part_name', anything_p_name), ('revenue_ratio', ROUND(DEFAULT_TO SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index f311b3bfb..0912d4959 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', anything_n_rows)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'r_name': t0.r_name}) +ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_above_avg_suppliers)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_lower_r_name': t0.anything_lower_r_name, 'anything_n_name': t0.anything_n_name, 'n_above_avg_suppliers': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -10,13 +10,13 @@ ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index b6041588b..3203eb669 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,12 +1,12 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) - JOIN(condition=t1.sbDpClose < t0.max_anything_sbDpClose & t0.anything_sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) - AGGREGATE(keys={'anything_sbTickerType': anything_sbTickerType}, aggregations={'max_anything_sbDpClose': MAX(anything_sbDpClose)}) - AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'anything_sbDpClose': ANYTHING(sbDpClose), 'anything_sbTickerType': ANYTHING(sbTickerType)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t0.sbDpTickerId, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t1.sbDpClose < t0.max_sbDpClose & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) + AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) + AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.max_sbDpClose, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 9141bb5ec..f7be3c1ff 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId_0 == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.anything_max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) - AGGREGATE(keys={'sbTxCustId_0': sbTxCustId}, aggregations={'anything_max_sbTxShares': ANYTHING(max_sbTxShares), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'max_max_sbTxShares': t0.max_sbTxShares, 'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_sql_refsols/correl_30_sqlite.sql b/tests/test_sql_refsols/correl_30_sqlite.sql index dac0716dc..e0e7d6846 100644 --- a/tests/test_sql_refsols/correl_30_sqlite.sql +++ b/tests/test_sql_refsols/correl_30_sqlite.sql @@ -65,10 +65,7 @@ SELECT _s13.n_rows AS n_above_avg_suppliers FROM _s12 AS _s12 JOIN _s13 AS _s13 - ON _s13.n_nationkey = anything_n_nationkey -GROUP BY - nation.n_nationkey, - n_nationkey + ON _s12.n_nationkey = _s13.n_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql index d8cd77ce8..e019f8e82 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql @@ -1,45 +1,42 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), + EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), 1, 2) - ELSE SUBSTRING( - CONCAT('00', EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))), - ( - 2 * -1 - ) - ) + WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))), ( + 2 * -1 + )) END ) AS month, - ANY_VALUE(sbticker.sbtickersymbol) AS anything_sbtickersymbol, - COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, - MAX(sbdailyprice.sbdphigh) AS max_sbdphigh, - MIN(sbdailyprice.sbdplow) AS min_sbdplow, - SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose - FROM main.sbdailyprice AS sbdailyprice - JOIN main.sbticker AS sbticker - ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice GROUP BY - sbdailyprice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbtickersymbol, - month, - MAX(max_sbdphigh) AS max_max_sbdphigh, - MIN(min_sbdplow) AS min_min_sbdplow, - SUM(count_sbdpclose) AS sum_count_sbdpclose, - SUM(sum_sbdpclose) AS sum_sum_sbdpclose - FROM _t1 + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbtickersymbol AS symbol, + sbtickersymbol AS symbol, month, sum_sum_sbdpclose / sum_count_sbdpclose AS avg_close, max_max_sbdphigh AS max_high, @@ -47,6 +44,6 @@ SELECT ( ( sum_sum_sbdpclose / sum_count_sbdpclose - ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month NULLS LAST) - ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month NULLS LAST) AS momc + ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) + ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_mysql.sql b/tests/test_sql_refsols/defog_broker_adv5_mysql.sql index fb9e56e78..6a857153f 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_mysql.sql @@ -1,36 +1,36 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbDailyPrice.sbdpdate AS DATETIME)), - LPAD(EXTRACT(MONTH FROM CAST(sbDailyPrice.sbdpdate AS DATETIME)), 2, '0') + EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), + LPAD(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME)), 2, '0') ) AS month, - ANY_VALUE(sbTicker.sbtickersymbol) AS anything_sbTickerSymbol, - COUNT(sbDailyPrice.sbdpclose) AS count_sbDpClose, - MAX(sbDailyPrice.sbdphigh) AS max_sbDpHigh, - MIN(sbDailyPrice.sbdplow) AS min_sbDpLow, - SUM(sbDailyPrice.sbdpclose) AS sum_sbDpClose - FROM main.sbDailyPrice AS sbDailyPrice - JOIN main.sbTicker AS sbTicker - ON sbDailyPrice.sbdptickerid = sbTicker.sbtickerid + sbdptickerid AS sbDpTickerId, + COUNT(sbdpclose) AS count_sbDpClose, + MAX(sbdphigh) AS max_sbDpHigh, + MIN(sbdplow) AS min_sbDpLow, + SUM(sbdpclose) AS sum_sbDpClose + FROM main.sbDailyPrice GROUP BY - sbDailyPrice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbTickerSymbol, - month, - MAX(max_sbDpHigh) AS max_max_sbDpHigh, - MIN(min_sbDpLow) AS min_min_sbDpLow, - SUM(count_sbDpClose) AS sum_count_sbDpClose, - SUM(sum_sbDpClose) AS sum_sum_sbDpClose - FROM _t1 + _s0.month, + sbTicker.sbtickersymbol AS sbTickerSymbol, + MAX(_s0.max_sbDpHigh) AS max_max_sbDpHigh, + MIN(_s0.min_sbDpLow) AS min_min_sbDpLow, + SUM(_s0.count_sbDpClose) AS sum_count_sbDpClose, + SUM(_s0.sum_sbDpClose) AS sum_sum_sbDpClose + FROM _s0 AS _s0 + JOIN main.sbTicker AS sbTicker + ON _s0.sbDpTickerId = sbTicker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbTickerSymbol AS symbol, + sbTickerSymbol AS symbol, month, sum_sum_sbDpClose / sum_count_sbDpClose AS avg_close, max_max_sbDpHigh AS max_high, @@ -38,6 +38,6 @@ SELECT ( ( sum_sum_sbDpClose / sum_count_sbDpClose - ) - LAG(sum_sum_sbDpClose / sum_count_sbDpClose, 1) OVER (PARTITION BY anything_sbTickerSymbol ORDER BY CASE WHEN month COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, month COLLATE utf8mb4_bin) - ) / LAG(sum_sum_sbDpClose / sum_count_sbDpClose, 1) OVER (PARTITION BY anything_sbTickerSymbol ORDER BY CASE WHEN month COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, month COLLATE utf8mb4_bin) AS momc + ) - LAG(sum_sum_sbDpClose / sum_count_sbDpClose, 1) OVER (PARTITION BY sbTickerSymbol ORDER BY CASE WHEN month COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, month COLLATE utf8mb4_bin) + ) / LAG(sum_sum_sbDpClose / sum_count_sbDpClose, 1) OVER (PARTITION BY sbTickerSymbol ORDER BY CASE WHEN month COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, month COLLATE utf8mb4_bin) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_postgres.sql b/tests/test_sql_refsols/defog_broker_adv5_postgres.sql index e206b89f7..738776d67 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_postgres.sql @@ -1,36 +1,36 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbdailyprice.sbdpdate AS TIMESTAMP)), - LPAD(CAST(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS TIMESTAMP)) AS TEXT), 2, '0') + EXTRACT(YEAR FROM CAST(sbdpdate AS TIMESTAMP)), + LPAD(CAST(EXTRACT(MONTH FROM CAST(sbdpdate AS TIMESTAMP)) AS TEXT), 2, '0') ) AS month, - MAX(sbticker.sbtickersymbol) AS anything_sbtickersymbol, - COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, - MAX(sbdailyprice.sbdphigh) AS max_sbdphigh, - MIN(sbdailyprice.sbdplow) AS min_sbdplow, - SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose - FROM main.sbdailyprice AS sbdailyprice - JOIN main.sbticker AS sbticker - ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice GROUP BY - sbdailyprice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbtickersymbol, - month, - MAX(max_sbdphigh) AS max_max_sbdphigh, - MIN(min_sbdplow) AS min_min_sbdplow, - SUM(count_sbdpclose) AS sum_count_sbdpclose, - SUM(sum_sbdpclose) AS sum_sum_sbdpclose - FROM _t1 + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbtickersymbol AS symbol, + sbtickersymbol AS symbol, month, CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose AS avg_close, max_max_sbdphigh AS max_high, @@ -38,6 +38,6 @@ SELECT CAST(( ( CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose - ) - LAG(CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) - ) AS DOUBLE PRECISION) / LAG(CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) AS momc + ) - LAG(CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) AS DOUBLE PRECISION) / LAG(CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql index 29611e6f5..f37e844f0 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql @@ -1,36 +1,36 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - YEAR(CAST(sbdailyprice.sbdpdate AS TIMESTAMP)), - LPAD(MONTH(CAST(sbdailyprice.sbdpdate AS TIMESTAMP)), 2, '0') + YEAR(CAST(sbdpdate AS TIMESTAMP)), + LPAD(MONTH(CAST(sbdpdate AS TIMESTAMP)), 2, '0') ) AS month, - ANY_VALUE(sbticker.sbtickersymbol) AS anything_sbtickersymbol, - COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, - MAX(sbdailyprice.sbdphigh) AS max_sbdphigh, - MIN(sbdailyprice.sbdplow) AS min_sbdplow, - SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose - FROM main.sbdailyprice AS sbdailyprice - JOIN main.sbticker AS sbticker - ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice GROUP BY - sbdailyprice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbtickersymbol, - month, - MAX(max_sbdphigh) AS max_max_sbdphigh, - MIN(min_sbdplow) AS min_min_sbdplow, - SUM(count_sbdpclose) AS sum_count_sbdpclose, - SUM(sum_sbdpclose) AS sum_sum_sbdpclose - FROM _t1 + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbtickersymbol AS symbol, + sbtickersymbol AS symbol, month, sum_sum_sbdpclose / sum_count_sbdpclose AS avg_close, max_max_sbdphigh AS max_high, @@ -38,6 +38,6 @@ SELECT ( ( sum_sum_sbdpclose / sum_count_sbdpclose - ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) - ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) AS momc + ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql index 76d37eb0c..57499068b 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql @@ -1,42 +1,42 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', sbdailyprice.sbdpdate) AS INTEGER), + CAST(STRFTIME('%Y', sbdpdate) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', sbdpdate) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', sbdpdate) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdpdate) AS INTEGER), ( 2 * -1 )) END ) AS month, - MAX(sbticker.sbtickersymbol) AS anything_sbtickersymbol, - COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, - MAX(sbdailyprice.sbdphigh) AS max_sbdphigh, - MIN(sbdailyprice.sbdplow) AS min_sbdplow, - SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose - FROM main.sbdailyprice AS sbdailyprice - JOIN main.sbticker AS sbticker - ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice GROUP BY - sbdailyprice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbtickersymbol, - month, - MAX(max_sbdphigh) AS max_max_sbdphigh, - MIN(min_sbdplow) AS min_min_sbdplow, - SUM(count_sbdpclose) AS sum_count_sbdpclose, - SUM(sum_sbdpclose) AS sum_sum_sbdpclose - FROM _t1 + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbtickersymbol AS symbol, + sbtickersymbol AS symbol, month, CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose AS avg_close, max_max_sbdphigh AS max_high, @@ -44,6 +44,6 @@ SELECT CAST(( ( CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose - ) - LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) - ) AS REAL) / LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) AS momc + ) - LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) AS REAL) / LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql index fd35dfa0a..fa322a6fc 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - ANY_VALUE(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( ( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv11_mysql.sql index fd35dfa0a..fa322a6fc 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_mysql.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - ANY_VALUE(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( ( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv11_postgres.sql index ae777ae80..c923f25aa 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_postgres.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - MAX(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS TIMESTAMP)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS TIMESTAMP)) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( CAST(( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) AS DOUBLE PRECISION) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) AS DOUBLE PRECISION) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql index 56a1c04c0..22a6080ca 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - ANY_VALUE(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - YEAR(CAST(sales.sale_date AS TIMESTAMP)) = 2023 + YEAR(CAST(sale_date AS TIMESTAMP)) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( ( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql index f515a9cbb..8309fef26 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - MAX(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - CAST(STRFTIME('%Y', sales.sale_date) AS INTEGER) = 2023 + CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( CAST(( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) AS REAL) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) AS REAL) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql index 3e9cb8d22..5acfd1845 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql @@ -1,29 +1,29 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT - DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) AS quarter, - ANY_VALUE(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state AS customer_state, + state AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/defog_dealership_gen4_mysql.sql b/tests/test_sql_refsols/defog_dealership_gen4_mysql.sql index 4c6e93a0b..7233adba8 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_mysql.sql @@ -1,37 +1,37 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT STR_TO_DATE( CONCAT( - YEAR(CAST(sales.sale_date AS DATETIME)), + YEAR(CAST(sale_date AS DATETIME)), ' ', - QUARTER(CAST(sales.sale_date AS DATETIME)) * 3 - 2, + QUARTER(CAST(sale_date AS DATETIME)) * 3 - 2, ' 1' ), '%Y %c %e' ) AS quarter, - ANY_VALUE(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state COLLATE utf8mb4_bin AS customer_state, + state COLLATE utf8mb4_bin AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/defog_dealership_gen4_postgres.sql b/tests/test_sql_refsols/defog_dealership_gen4_postgres.sql index 7b48e2ed5..858012735 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_postgres.sql @@ -1,29 +1,29 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT - DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) AS quarter, - MAX(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS TIMESTAMP)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS TIMESTAMP)) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state AS customer_state, + state AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql index 2c0414fc9..f9b3c5bb4 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql @@ -1,29 +1,29 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT - DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) AS quarter, - ANY_VALUE(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - YEAR(CAST(sales.sale_date AS TIMESTAMP)) = 2023 + YEAR(CAST(sale_date AS TIMESTAMP)) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state AS customer_state, + state AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql index 669018a02..9a3119405 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql @@ -1,37 +1,37 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT DATE( - sales.sale_date, + sale_date, 'start of month', '-' || CAST(( ( - CAST(STRFTIME('%m', DATETIME(sales.sale_date)) AS INTEGER) - 1 + CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' ) AS quarter, - MAX(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - CAST(STRFTIME('%Y', sales.sale_date) AS INTEGER) = 2023 + CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state AS customer_state, + state AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql index 9c3b1c7d9..3ae3d4eba 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql @@ -10,15 +10,6 @@ WITH _s0 AS ( pr_id, pr_release FROM main.products -), _t1 AS ( - SELECT - COUNT(*) AS n_rows_1, - ANY_VALUE(_s1.pr_release) AS pr_release - FROM main.devices AS devices - JOIN _s1 AS _s1 - ON _s1.pr_id = devices.de_product_id - GROUP BY - devices.de_product_id ), _s6 AS ( SELECT EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS year_pr_release, diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql index 5de1ba7f3..473886a2e 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql @@ -10,15 +10,6 @@ WITH _s0 AS ( pr_id, pr_release FROM main.products -), _t1 AS ( - SELECT - COUNT(*) AS n_rows_1, - MAX(_s1.pr_release) AS pr_release - FROM main.devices AS devices - JOIN _s1 AS _s1 - ON _s1.pr_id = devices.de_product_id - GROUP BY - devices.de_product_id ), _s6 AS ( SELECT CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS year_pr_release, From f130bc3f7bd89c4a8e6a1db9e63f22ea0838037d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 18 Sep 2025 13:58:22 -0400 Subject: [PATCH 096/143] WIP --- pydough/conversion/join_agg_transpose.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py index 6f32751b1..0a46234ba 100644 --- a/pydough/conversion/join_agg_transpose.py +++ b/pydough/conversion/join_agg_transpose.py @@ -14,6 +14,7 @@ Join, JoinCardinality, JoinType, + Project, RelationalExpression, RelationalNode, RelationalRoot, @@ -139,6 +140,10 @@ def join_aggregate_transpose( ): return None + # A mapping that will be used to map every expression with regards to + # the original join looking at its input expressions to what the + # expression will be in the output columns of the new aggregate + new_join_columns: dict[str, RelationalExpression] = {} new_aggregate_aggs: dict[str, CallExpression] = {} new_aggregate_keys: dict[str, RelationalExpression] = {} @@ -150,6 +155,10 @@ def join_aggregate_transpose( [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] ) + project_columns: dict[str, RelationalExpression] = {} + + assert False + # TODO: FINISH THIS return None @@ -167,7 +176,7 @@ def join_aggregate_transpose( new_join, new_aggregate_keys, new_aggregate_aggs ) - return new_aggregate + return Project(new_aggregate, project_columns) def pull_joins_after_aggregates(node: RelationalRoot) -> RelationalNode: From 401c1bcb8f5fcc88631d744711a0d4ec456a18c3 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 6 Oct 2025 15:17:37 -0400 Subject: [PATCH 097/143] Resolving conflicts --- tests/test_plan_refsols/cryptbank_agg_03.txt | 9 ----- tests/test_plan_refsols/cryptbank_agg_04.txt | 5 --- tests/test_plan_refsols/cryptbank_agg_05.txt | 10 ----- .../cryptbank_analysis_01.txt | 22 ----------- .../cryptbank_analysis_02.txt | 22 ----------- .../cryptbank_analysis_03.txt | 39 ------------------- .../cryptbank_analysis_04.txt | 14 ------- .../cryptbank_filter_count_11.txt | 14 ------- .../cryptbank_filter_count_12.txt | 5 --- .../cryptbank_filter_count_13.txt | 5 --- .../cryptbank_filter_count_15.txt | 10 ----- .../cryptbank_filter_count_16.txt | 10 ----- .../cryptbank_filter_count_28.txt | 7 ---- .../cryptbank_general_join_01.txt | 16 -------- .../cryptbank_general_join_02.txt | 11 ------ 15 files changed, 199 deletions(-) delete mode 100644 tests/test_plan_refsols/cryptbank_agg_03.txt delete mode 100644 tests/test_plan_refsols/cryptbank_agg_04.txt delete mode 100644 tests/test_plan_refsols/cryptbank_agg_05.txt delete mode 100644 tests/test_plan_refsols/cryptbank_analysis_01.txt delete mode 100644 tests/test_plan_refsols/cryptbank_analysis_02.txt delete mode 100644 tests/test_plan_refsols/cryptbank_analysis_03.txt delete mode 100644 tests/test_plan_refsols/cryptbank_analysis_04.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_11.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_12.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_13.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_15.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_16.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_28.txt delete mode 100644 tests/test_plan_refsols/cryptbank_general_join_01.txt delete mode 100644 tests/test_plan_refsols/cryptbank_general_join_02.txt diff --git a/tests/test_plan_refsols/cryptbank_agg_03.txt b/tests/test_plan_refsols/cryptbank_agg_03.txt deleted file mode 100644 index 464b8634c..000000000 --- a/tests/test_plan_refsols/cryptbank_agg_03.txt +++ /dev/null @@ -1,9 +0,0 @@ -ROOT(columns=[('account_type', a_type), ('balance', a_balance), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname))], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[a_type], order=[(a_balance):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname}) -<<<<<<< HEAD - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) -======= - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_type': a_type}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_agg_04.txt b/tests/test_plan_refsols/cryptbank_agg_04.txt deleted file mode 100644 index 8d096df95..000000000 --- a/tests/test_plan_refsols/cryptbank_agg_04.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) - JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_a_balance': t1.sum_a_balance}) - SCAN(table=CRBNK.BRANCHES, columns={'b_key': b_key}) - AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_a_balance': SUM(a_balance)}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) diff --git a/tests/test_plan_refsols/cryptbank_agg_05.txt b/tests/test_plan_refsols/cryptbank_agg_05.txt deleted file mode 100644 index f7672b643..000000000 --- a/tests/test_plan_refsols/cryptbank_agg_05.txt +++ /dev/null @@ -1,10 +0,0 @@ -ROOT(columns=[('avg_secs', ROUND(avg_expr, 2:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'avg_expr': AVG(DATEDIFF('seconds':string, a_open_ts, min_t_ts))}) -<<<<<<< HEAD - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'a_open_ts': t0.a_open_ts, 'min_t_ts': t1.min_t_ts}) -======= - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_open_ts': t0.a_open_ts, 'min_t_ts': t1.min_t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) - AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'min_t_ts': MIN(t_ts)}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_01.txt b/tests/test_plan_refsols/cryptbank_analysis_01.txt deleted file mode 100644 index c23ce20f3..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_01.txt +++ /dev/null @@ -1,22 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_sends', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) -======= - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_02.txt b/tests/test_plan_refsols/cryptbank_analysis_02.txt deleted file mode 100644 index 3525b5c77..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_02.txt +++ /dev/null @@ -1,22 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_recvs', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) -======= - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_03.txt b/tests/test_plan_refsols/cryptbank_analysis_03.txt deleted file mode 100644 index 363340648..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_03.txt +++ /dev/null @@ -1,39 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_sends', DEFAULT_TO(agg_1, 0:numeric)), ('first_recvs', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) + DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.sum_t_amount, 'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - JOIN(condition=t0.c_key == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) -======= - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.sum_t_amount, 'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) -<<<<<<< HEAD - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) -======= - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_04.txt b/tests/test_plan_refsols/cryptbank_analysis_04.txt deleted file mode 100644 index e8220860e..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_04.txt +++ /dev/null @@ -1,14 +0,0 @@ -ROOT(columns=[('key', a_key), ('cust_name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('n_trans', n_rows)], orderings=[(a_key):asc_first]) -<<<<<<< HEAD - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 'n_rows': t1.n_rows}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) -======= - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 'n_rows': t1.n_rows}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=MONOTONIC(1980:numeric, YEAR(c_birthday), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'n_rows': COUNT()}) - FILTER(condition=t_amount > 9000.0:numeric, columns={'t_sourceaccount': t_sourceaccount}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_sourceaccount': t_sourceaccount}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_11.txt b/tests/test_plan_refsols/cryptbank_filter_count_11.txt deleted file mode 100644 index 40a01c208..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_11.txt +++ /dev/null @@ -1,14 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) -<<<<<<< HEAD - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key}) -======= - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=c_fname == 'alice':string, columns={'c_key': c_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_12.txt b/tests/test_plan_refsols/cryptbank_filter_count_12.txt deleted file mode 100644 index 03bc19679..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_12.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=YEAR(t0.t_ts) == YEAR(t1.a_open_ts) & t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_13.txt b/tests/test_plan_refsols/cryptbank_filter_count_13.txt deleted file mode 100644 index 836560846..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_13.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.t_ts < DATETIME(t1.a_open_ts, '+2 years':string) & t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_destaccount': t_destaccount, 't_ts': t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_15.txt b/tests/test_plan_refsols/cryptbank_filter_count_15.txt deleted file mode 100644 index b6667b78a..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_15.txt +++ /dev/null @@ -1,10 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=SEMI, columns={}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_key': c_key}) - FILTER(condition=a_type == 'retirement':string, columns={'a_custkey': a_custkey}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_type': a_type}) -======= - SCAN(table=CRBNK.CUSTOMERS, columns={}) ->>>>>>> main diff --git a/tests/test_plan_refsols/cryptbank_filter_count_16.txt b/tests/test_plan_refsols/cryptbank_filter_count_16.txt deleted file mode 100644 index 373780af6..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_16.txt +++ /dev/null @@ -1,10 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=SEMI, columns={}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_key': c_key}) - FILTER(condition=a_type != 'checking':string & a_type != 'savings':string, columns={'a_custkey': a_custkey}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_type': a_type}) -======= - SCAN(table=CRBNK.CUSTOMERS, columns={}) ->>>>>>> main diff --git a/tests/test_plan_refsols/cryptbank_filter_count_28.txt b/tests/test_plan_refsols/cryptbank_filter_count_28.txt deleted file mode 100644 index 4e8ab891b..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_28.txt +++ /dev/null @@ -1,7 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=YEAR(a_open_ts) < 2020:numeric & a_balance >= 5000:numeric & a_type == 'retirement':string | a_type == 'savings':string, columns={'a_custkey': a_custkey}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_open_ts': a_open_ts, 'a_type': a_type}) - FILTER(condition=CONTAINS(c_email, 'outlook':string) | CONTAINS(c_email, 'gmail':string), columns={'c_key': c_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01.txt b/tests/test_plan_refsols/cryptbank_general_join_01.txt deleted file mode 100644 index 6cdffeadb..000000000 --- a/tests/test_plan_refsols/cryptbank_general_join_01.txt +++ /dev/null @@ -1,16 +0,0 @@ -ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) -<<<<<<< HEAD - JOIN(condition=t0.b_key == t1.b_key & t0.c_key == t1.c_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) -======= - JOIN(condition=t0.b_key == t1.b_key & t0.c_key == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) ->>>>>>> main - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(t1.c_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - AGGREGATE(keys={'b_key': b_key, 'c_key': c_key}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_key == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(t1.c_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02.txt b/tests/test_plan_refsols/cryptbank_general_join_02.txt deleted file mode 100644 index 4a22534b7..000000000 --- a/tests/test_plan_refsols/cryptbank_general_join_02.txt +++ /dev/null @@ -1,11 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) -<<<<<<< HEAD - JOIN(condition=t0.a_custkey == t1.c_key & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) - JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(t0.c_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) -======= - SCAN(table=CRBNK.ACCOUNTS, columns={}) ->>>>>>> main From b987f30d3b2d1d4261d0f8e5b89a08a5f17a9009 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 00:01:01 -0400 Subject: [PATCH 098/143] Resolving conflicts --- .../conversion/aggregate_join_transpose.py | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 pydough/conversion/aggregate_join_transpose.py diff --git a/pydough/conversion/aggregate_join_transpose.py b/pydough/conversion/aggregate_join_transpose.py new file mode 100644 index 000000000..27c29b4ea --- /dev/null +++ b/pydough/conversion/aggregate_join_transpose.py @@ -0,0 +1,187 @@ +""" """ + +__all__ = ["pull_aggregates_above_joins"] + + +from collections.abc import Iterable + +import pydough.pydough_operators as pydop +from pydough.relational import ( + Aggregate, + CallExpression, + ColumnReference, + ColumnReferenceFinder, + Join, + JoinCardinality, + JoinType, + Project, + RelationalExpression, + RelationalNode, + RelationalRoot, + RelationalShuttle, +) + + +class JoinAggregateTransposeShuttle(RelationalShuttle): + """ + TODO + """ + + def __init__(self): + self.finder: ColumnReferenceFinder = ColumnReferenceFinder() + + def reset(self): + self.finder.reset() + + def visit_join(self, node: Join) -> RelationalNode: + result: RelationalNode | None = None + + # Attempt the transpose where the left input is an Aggregate. If it + # succeeded, use that as the result and recursively transform its + # inputs. + if isinstance(node.inputs[0], Aggregate): + result = self.join_aggregate_transpose(node, node.inputs[0], True) + if result is not None: + return self.generic_visit_inputs(result) + + # If the attempt failed, then attempt the transpose where the right + # input is an Aggregate. If this attempt succeeded, use that as the + # result and recursively transform its inputs. + if isinstance(node.inputs[1], Aggregate): + result = self.join_aggregate_transpose(node, node.inputs[1], False) + if result is not None: + return self.generic_visit_inputs(result) + + # If this attempt failed, fall back to the regular implementation. + return super().visit_join(node) + + def generate_name(self, base: str, used_names: Iterable[str]) -> str: + """ + Generates a new name for a column based on the base name and the existing + columns in the join. This is used to ensure that the new column names are + unique and do not conflict with existing names. + """ + if base not in used_names: + return base + i = 0 + while True: + name = f"{base}_{i}" + if name not in used_names: + return name + i += 1 + + def join_aggregate_transpose( + self, join: Join, aggregate: Aggregate, is_left: bool + ) -> RelationalNode | None: + """ + Transposes a Join above an Aggregate into an Aggregate above a Join, + when possible and it would be better for performance to use the join + first to filter some of the rows before aggregating. + + Args: + `join`: the Join node above the Aggregate. + `aggregate`: the Aggregate node that is the left input to the Join. + `is_left`: whether the Aggregate is the left input to the Join + (True) or the right input (False). + + Returns: + The new RelationalNode tree with the Join and Aggregate transposed, + or None if the transpose is not possible. + """ + # Verify that the join is an inner, left, or semi-join, and that the + # join cardinality is singular (unless the aggregations are not affected + # by a change in cardinality). + aggs_allow_plural: bool = all( + call.op in (pydop.MIN, pydop.MAX, pydop.ANYTHING, pydop.NDISTINCT) + for call in aggregate.aggregations.values() + ) + + # The cardinality with regards to the input being considered must be + # singular (unless the aggregations allow plural), and must be + # filtering (since the point of joining before aggregation is to reduce + # the number of rows to aggregate). + cardinality: JoinCardinality = ( + join.cardinality if is_left else join.reverse_cardinality + ) + + # Verify the cardinality meets the specified criteria, and that the join + # type is INNER/SEMI (since LEFT would not be filtering), where SEMI is + # only allowed if the aggregation is on the left. + if not ( + ( + (join.join_type == JoinType.INNER) + or (join.join_type == JoinType.SEMI and is_left) + ) + and cardinality.filters + and (cardinality.singular or aggs_allow_plural) + ): + return None + + # The alias of the input to the join that corresponds to the + # aggregate. + desired_alias: str | None = ( + join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + ) + + # Find all of the columns used in the join condition that come from the + # aggregate side of the join + self.finder.reset() + join.condition.accept(self.finder) + agg_condition_columns: set[ColumnReference] = { + col + for col in self.finder.get_column_references() + if col.input_name == desired_alias + } + + # Verify ALL of the condition columns from that side of the join are + # in the aggregate keys. + if len(agg_condition_columns) == 0 or any( + col.name not in aggregate.keys for col in agg_condition_columns + ): + return None + + # A mapping that will be used to map every expression with regards to + # the original join looking at its input expressions to what the + # expression will be in the output columns of the new aggregate + + new_join_columns: dict[str, RelationalExpression] = {} + new_aggregate_aggs: dict[str, CallExpression] = {} + new_aggregate_keys: dict[str, RelationalExpression] = {} + + new_condition: RelationalExpression = join.condition + agg_input: RelationalNode = aggregate.inputs[0] + non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] + new_join_inputs: list[RelationalNode] = ( + [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] + ) + + project_columns: dict[str, RelationalExpression] = {} + + # TODO: FINISH THIS + return None + + assert False + + new_join: Join = Join( + new_join_inputs, + new_condition, + join.join_type, + new_join_columns, + join.cardinality, + join.reverse_cardinality, + join.correl_name, + ) + + new_aggregate: Aggregate = Aggregate( + new_join, new_aggregate_keys, new_aggregate_aggs + ) + + return Project(new_aggregate, project_columns) + + +def pull_aggregates_above_joins(node: RelationalRoot) -> RelationalNode: + """ + TODO + """ + shuttle: JoinAggregateTransposeShuttle = JoinAggregateTransposeShuttle() + return node.accept_shuttle(shuttle) From d674b0d938afcdd5484c5ba60cf4ff1c8aeda19e Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 00:29:37 -0400 Subject: [PATCH 099/143] Adjusting reverse cardinality edge case --- .../conversion/aggregate_join_transpose.py | 38 ++++++++++++++++--- pydough/conversion/hybrid_tree.py | 14 +++++-- pydough/conversion/relational_converter.py | 6 +++ pydough/relational/relational_nodes/join.py | 17 +++++++++ tests/test_plan_refsols/common_prefix_an.txt | 2 +- tests/test_plan_refsols/common_prefix_v.txt | 2 +- .../customer_most_recent_orders.txt | 2 +- .../epoch_users_most_cold_war_searches.txt | 2 +- tests/test_plan_refsols/semi_aggregate.txt | 2 +- tests/test_plan_refsols/semi_singular.txt | 2 +- .../test_plan_refsols/supplier_best_part.txt | 2 +- tests/test_plan_refsols/tpch_q15.txt | 8 ++-- tests/test_sql_refsols/tpch_q15_ansi.sql | 25 ++++++------ tests/test_sql_refsols/tpch_q15_mysql.sql | 25 ++++++------ tests/test_sql_refsols/tpch_q15_postgres.sql | 25 ++++++------ tests/test_sql_refsols/tpch_q15_snowflake.sql | 25 ++++++------ tests/test_sql_refsols/tpch_q15_sqlite.sql | 25 ++++++------ 17 files changed, 130 insertions(+), 92 deletions(-) diff --git a/pydough/conversion/aggregate_join_transpose.py b/pydough/conversion/aggregate_join_transpose.py index 27c29b4ea..3c9b00719 100644 --- a/pydough/conversion/aggregate_join_transpose.py +++ b/pydough/conversion/aggregate_join_transpose.py @@ -20,6 +20,7 @@ RelationalRoot, RelationalShuttle, ) +from pydough.relational.rel_util import add_input_name, apply_substitution class JoinAggregateTransposeShuttle(RelationalShuttle): @@ -148,30 +149,55 @@ def join_aggregate_transpose( new_aggregate_aggs: dict[str, CallExpression] = {} new_aggregate_keys: dict[str, RelationalExpression] = {} - new_condition: RelationalExpression = join.condition agg_input: RelationalNode = aggregate.inputs[0] non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] new_join_inputs: list[RelationalNode] = ( [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] ) + join_reverse_map: dict[RelationalExpression, set[str]] = {} + for col_name, expr in join.columns.items(): + if expr not in join_reverse_map: + join_reverse_map[expr] = set() + join_reverse_map[expr].add(col_name) + project_columns: dict[str, RelationalExpression] = {} - # TODO: FINISH THIS - return None + new_cardinality: JoinCardinality = join.cardinality + new_reverse_cardinality: JoinCardinality = join.reverse_cardinality + if is_left: + new_reverse_cardinality = new_reverse_cardinality.add_plural() + else: + new_cardinality = new_cardinality.add_plural() - assert False + agg_alias: str | None = ( + join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + ) + (join.default_input_aliases[1] if is_left else join.default_input_aliases[0]) + agg_key_substitution: dict[RelationalExpression, RelationalExpression] = {} + for key_name, key_expr in aggregate.keys.items(): + sided_key: RelationalExpression = ColumnReference( + key_name, key_expr.data_type, agg_alias + ) + agg_key_substitution[sided_key] = add_input_name(key_expr, agg_alias) + new_condition: RelationalExpression = apply_substitution( + join.condition, agg_key_substitution, {} + ) new_join: Join = Join( new_join_inputs, new_condition, join.join_type, new_join_columns, - join.cardinality, - join.reverse_cardinality, + new_cardinality, + new_reverse_cardinality, join.correl_name, ) + return None + breakpoint() + assert False + new_aggregate: Aggregate = Aggregate( new_join, new_aggregate_keys, new_aggregate_aggs ) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 7c50c5e6b..8f4306c57 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -587,6 +587,16 @@ def add_child( self ) + # Augment the reverse cardinality if the parent does not always exist. + if not reverse_cardinality.filters: + if len(self.pipeline) == 1 and isinstance( + self.pipeline[0], HybridPartition + ): + if self.parent is not None and not self.parent.always_exists(): + reverse_cardinality = reverse_cardinality.add_filter() + elif not self.always_exists(): + reverse_cardinality = reverse_cardinality.add_filter() + # Create and insert the new child connection. new_child_idx = len(self.children) connection: HybridConnection = HybridConnection( @@ -600,10 +610,6 @@ def add_child( ) self._children.append(connection) - # Augment the reverse cardinality if the parent does not always exist. - if (not reverse_cardinality.filters) and (not self.always_exists()): - connection.reverse_cardinality = reverse_cardinality.add_filter() - # If an operation prevents the child's presence from directly # filtering the current level, update its connection type to be either # SINGULAR or AGGREGATION, then insert a similar COUNT(*)/PRESENT diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 5a7452d83..5a50cb2bf 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1546,6 +1546,9 @@ def optimize_relational_tree( pruner: ColumnPruner = ColumnPruner() root = pruner.prune_unused_columns(root) + print() + print(root.to_tree_string()) + # Bubble up names from the leaf nodes to further encourage simpler naming # without aliases, and also to delete duplicate columns where possible. # This is done early to maximize the chances that a nicer name will be used @@ -1654,6 +1657,9 @@ def convert_ast_to_relational( hybrid_translator: HybridTranslator = HybridTranslator(session) hybrid: HybridTree = hybrid_translator.convert_qdag_to_hybrid(node) + print() + print(hybrid) + # Then, invoke relational conversion procedure. The first element in the # returned list is the final relational tree. output: TranslationOutput = rel_translator.rel_translation( diff --git a/pydough/relational/relational_nodes/join.py b/pydough/relational/relational_nodes/join.py index acba1ce33..c1565c3f8 100644 --- a/pydough/relational/relational_nodes/join.py +++ b/pydough/relational/relational_nodes/join.py @@ -90,6 +90,23 @@ def remove_filter(self) -> "JoinCardinality": else: return self + def add_plural(self) -> "JoinCardinality": + """ + Returns a new JoinCardinality referring to the current value but with + plural cardinality added. + """ + if self in (JoinCardinality.SINGULAR_FILTER, JoinCardinality.UNKNOWN_FILTER): + return JoinCardinality.PLURAL_FILTER + elif self in (JoinCardinality.SINGULAR_ACCESS, JoinCardinality.UNKNOWN_ACCESS): + return JoinCardinality.PLURAL_ACCESS + elif self in ( + JoinCardinality.SINGULAR_UNKNOWN, + JoinCardinality.UNKNOWN_UNKNOWN, + ): + return JoinCardinality.PLURAL_UNKNOWN + else: + return self + @property def accesses(self) -> bool: """ diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 98beec996..6a01e42e5 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -2,7 +2,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index 72dfc31e5..1d607394a 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 0fe2509df..5d85af002 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_recent_value', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last], limit=3:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt index 9cc6d3432..9e78c8c06 100644 --- a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt +++ b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt @@ -1,5 +1,5 @@ ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_rows)], orderings=[(n_rows):desc_last, (user_name):asc_first], limit=3:numeric) - JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 62c91f92a..18f353c33 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/semi_singular.txt b/tests/test_plan_refsols/semi_singular.txt index 675513cc3..e7c11c269 100644 --- a/tests/test_plan_refsols/semi_singular.txt +++ b/tests/test_plan_refsols/semi_singular.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('region_name', r_name)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index f9b08525e..a63790075 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -6,7 +6,7 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index ead8b6cda..b0f62ba71 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -2,11 +2,9 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_addr JOIN(condition=DEFAULT_TO(t1.sum_expr, 0:numeric) == t0.max_total_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr': t1.sum_expr}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_total_revenue': t0.max_total_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'max_total_revenue': MAX(DEFAULT_TO(sum_expr, 0:numeric))}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sum_expr': t1.sum_expr}) - SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr': SUM(l_extendedprice * 1:numeric - l_discount)}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr': SUM(l_extendedprice * 1:numeric - l_discount)}) FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) diff --git a/tests/test_sql_refsols/tpch_q15_ansi.sql b/tests/test_sql_refsols/tpch_q15_ansi.sql index cc9bc9485..cc48d214f 100644 --- a/tests/test_sql_refsols/tpch_q15_ansi.sql +++ b/tests/test_sql_refsols/tpch_q15_ansi.sql @@ -8,22 +8,19 @@ WITH _t3 AS ( WHERE l_shipdate < CAST('1996-04-01' AS DATE) AND l_shipdate >= CAST('1996-01-01' AS DATE) -), _s1 AS ( +), _t1 AS ( SELECT - l_suppkey, SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr FROM _t3 GROUP BY - 1 -), _s2 AS ( + l_suppkey +), _s0 AS ( SELECT - MAX(COALESCE(_s1.sum_expr, 0)) AS max_total_revenue - FROM tpch.supplier AS supplier - JOIN _s1 AS _s1 - ON _s1.l_suppkey = supplier.s_suppkey -), _s5 AS ( + MAX(COALESCE(sum_expr, 0)) AS max_total_revenue + FROM _t1 +), _s3 AS ( SELECT l_suppkey, SUM(l_extendedprice * ( @@ -38,11 +35,11 @@ SELECT supplier.s_name AS S_NAME, supplier.s_address AS S_ADDRESS, supplier.s_phone AS S_PHONE, - COALESCE(_s5.sum_expr, 0) AS TOTAL_REVENUE -FROM _s2 AS _s2 + COALESCE(_s3.sum_expr, 0) AS TOTAL_REVENUE +FROM _s0 AS _s0 CROSS JOIN tpch.supplier AS supplier -JOIN _s5 AS _s5 - ON _s2.max_total_revenue = COALESCE(_s5.sum_expr, 0) - AND _s5.l_suppkey = supplier.s_suppkey +JOIN _s3 AS _s3 + ON _s0.max_total_revenue = COALESCE(_s3.sum_expr, 0) + AND _s3.l_suppkey = supplier.s_suppkey ORDER BY 1 diff --git a/tests/test_sql_refsols/tpch_q15_mysql.sql b/tests/test_sql_refsols/tpch_q15_mysql.sql index da971edde..4ee343695 100644 --- a/tests/test_sql_refsols/tpch_q15_mysql.sql +++ b/tests/test_sql_refsols/tpch_q15_mysql.sql @@ -8,22 +8,19 @@ WITH _t3 AS ( WHERE l_shipdate < CAST('1996-04-01' AS DATE) AND l_shipdate >= CAST('1996-01-01' AS DATE) -), _s1 AS ( +), _t1 AS ( SELECT - l_suppkey, SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr FROM _t3 GROUP BY - 1 -), _s2 AS ( + l_suppkey +), _s0 AS ( SELECT - MAX(COALESCE(_s1.sum_expr, 0)) AS max_total_revenue - FROM tpch.SUPPLIER AS SUPPLIER - JOIN _s1 AS _s1 - ON SUPPLIER.s_suppkey = _s1.l_suppkey -), _s5 AS ( + MAX(COALESCE(sum_expr, 0)) AS max_total_revenue + FROM _t1 +), _s3 AS ( SELECT l_suppkey, SUM(l_extendedprice * ( @@ -38,11 +35,11 @@ SELECT SUPPLIER.s_name AS S_NAME, SUPPLIER.s_address AS S_ADDRESS, SUPPLIER.s_phone AS S_PHONE, - COALESCE(_s5.sum_expr, 0) AS TOTAL_REVENUE -FROM _s2 AS _s2 + COALESCE(_s3.sum_expr, 0) AS TOTAL_REVENUE +FROM _s0 AS _s0 CROSS JOIN tpch.SUPPLIER AS SUPPLIER -JOIN _s5 AS _s5 - ON SUPPLIER.s_suppkey = _s5.l_suppkey - AND _s2.max_total_revenue = COALESCE(_s5.sum_expr, 0) +JOIN _s3 AS _s3 + ON SUPPLIER.s_suppkey = _s3.l_suppkey + AND _s0.max_total_revenue = COALESCE(_s3.sum_expr, 0) ORDER BY 1 diff --git a/tests/test_sql_refsols/tpch_q15_postgres.sql b/tests/test_sql_refsols/tpch_q15_postgres.sql index 8537c8662..5481d24fd 100644 --- a/tests/test_sql_refsols/tpch_q15_postgres.sql +++ b/tests/test_sql_refsols/tpch_q15_postgres.sql @@ -8,22 +8,19 @@ WITH _t3 AS ( WHERE l_shipdate < CAST('1996-04-01' AS DATE) AND l_shipdate >= CAST('1996-01-01' AS DATE) -), _s1 AS ( +), _t1 AS ( SELECT - l_suppkey, SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr FROM _t3 GROUP BY - 1 -), _s2 AS ( + l_suppkey +), _s0 AS ( SELECT - MAX(COALESCE(_s1.sum_expr, 0)) AS max_total_revenue - FROM tpch.supplier AS supplier - JOIN _s1 AS _s1 - ON _s1.l_suppkey = supplier.s_suppkey -), _s5 AS ( + MAX(COALESCE(sum_expr, 0)) AS max_total_revenue + FROM _t1 +), _s3 AS ( SELECT l_suppkey, SUM(l_extendedprice * ( @@ -38,11 +35,11 @@ SELECT supplier.s_name AS S_NAME, supplier.s_address AS S_ADDRESS, supplier.s_phone AS S_PHONE, - COALESCE(_s5.sum_expr, 0) AS TOTAL_REVENUE -FROM _s2 AS _s2 + COALESCE(_s3.sum_expr, 0) AS TOTAL_REVENUE +FROM _s0 AS _s0 CROSS JOIN tpch.supplier AS supplier -JOIN _s5 AS _s5 - ON _s2.max_total_revenue = COALESCE(_s5.sum_expr, 0) - AND _s5.l_suppkey = supplier.s_suppkey +JOIN _s3 AS _s3 + ON _s0.max_total_revenue = COALESCE(_s3.sum_expr, 0) + AND _s3.l_suppkey = supplier.s_suppkey ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q15_snowflake.sql b/tests/test_sql_refsols/tpch_q15_snowflake.sql index 8537c8662..5481d24fd 100644 --- a/tests/test_sql_refsols/tpch_q15_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q15_snowflake.sql @@ -8,22 +8,19 @@ WITH _t3 AS ( WHERE l_shipdate < CAST('1996-04-01' AS DATE) AND l_shipdate >= CAST('1996-01-01' AS DATE) -), _s1 AS ( +), _t1 AS ( SELECT - l_suppkey, SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr FROM _t3 GROUP BY - 1 -), _s2 AS ( + l_suppkey +), _s0 AS ( SELECT - MAX(COALESCE(_s1.sum_expr, 0)) AS max_total_revenue - FROM tpch.supplier AS supplier - JOIN _s1 AS _s1 - ON _s1.l_suppkey = supplier.s_suppkey -), _s5 AS ( + MAX(COALESCE(sum_expr, 0)) AS max_total_revenue + FROM _t1 +), _s3 AS ( SELECT l_suppkey, SUM(l_extendedprice * ( @@ -38,11 +35,11 @@ SELECT supplier.s_name AS S_NAME, supplier.s_address AS S_ADDRESS, supplier.s_phone AS S_PHONE, - COALESCE(_s5.sum_expr, 0) AS TOTAL_REVENUE -FROM _s2 AS _s2 + COALESCE(_s3.sum_expr, 0) AS TOTAL_REVENUE +FROM _s0 AS _s0 CROSS JOIN tpch.supplier AS supplier -JOIN _s5 AS _s5 - ON _s2.max_total_revenue = COALESCE(_s5.sum_expr, 0) - AND _s5.l_suppkey = supplier.s_suppkey +JOIN _s3 AS _s3 + ON _s0.max_total_revenue = COALESCE(_s3.sum_expr, 0) + AND _s3.l_suppkey = supplier.s_suppkey ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q15_sqlite.sql b/tests/test_sql_refsols/tpch_q15_sqlite.sql index 24d23d69e..fa41139ae 100644 --- a/tests/test_sql_refsols/tpch_q15_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q15_sqlite.sql @@ -7,22 +7,19 @@ WITH _t3 AS ( FROM tpch.lineitem WHERE l_shipdate < '1996-04-01' AND l_shipdate >= '1996-01-01' -), _s1 AS ( +), _t1 AS ( SELECT - l_suppkey, SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr FROM _t3 GROUP BY - 1 -), _s2 AS ( + l_suppkey +), _s0 AS ( SELECT - MAX(COALESCE(_s1.sum_expr, 0)) AS max_total_revenue - FROM tpch.supplier AS supplier - JOIN _s1 AS _s1 - ON _s1.l_suppkey = supplier.s_suppkey -), _s5 AS ( + MAX(COALESCE(sum_expr, 0)) AS max_total_revenue + FROM _t1 +), _s3 AS ( SELECT l_suppkey, SUM(l_extendedprice * ( @@ -37,11 +34,11 @@ SELECT supplier.s_name AS S_NAME, supplier.s_address AS S_ADDRESS, supplier.s_phone AS S_PHONE, - COALESCE(_s5.sum_expr, 0) AS TOTAL_REVENUE -FROM _s2 AS _s2 + COALESCE(_s3.sum_expr, 0) AS TOTAL_REVENUE +FROM _s0 AS _s0 CROSS JOIN tpch.supplier AS supplier -JOIN _s5 AS _s5 - ON _s2.max_total_revenue = COALESCE(_s5.sum_expr, 0) - AND _s5.l_suppkey = supplier.s_suppkey +JOIN _s3 AS _s3 + ON _s0.max_total_revenue = COALESCE(_s3.sum_expr, 0) + AND _s3.l_suppkey = supplier.s_suppkey ORDER BY 1 From 4206914df7acb4c30035e00252322028f68d133d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 14:21:52 -0400 Subject: [PATCH 100/143] WIP transpose logic --- ...anspose.py => join_aggregate_transpose.py} | 140 ++++++++++++++--- pydough/conversion/relational_converter.py | 8 +- .../avg_order_diff_per_customer.txt | 14 +- tests/test_plan_refsols/common_prefix_ad.txt | 18 +-- tests/test_plan_refsols/common_prefix_ae.txt | 14 +- tests/test_plan_refsols/common_prefix_al.txt | 30 ++-- tests/test_plan_refsols/common_prefix_n.txt | 14 +- tests/test_plan_refsols/common_prefix_t.txt | 22 +-- tests/test_plan_refsols/correl_13.txt | 8 +- tests/test_plan_refsols/correl_14.txt | 16 +- tests/test_plan_refsols/correl_15.txt | 22 +-- tests/test_plan_refsols/correl_6.txt | 8 +- .../cryptbank_analysis_04_raw.txt | 14 +- .../cryptbank_analysis_04_rewrite.txt | 14 +- .../customer_largest_order_deltas.txt | 10 +- .../month_year_sliding_windows.txt | 14 +- .../nation_acctbal_breakdown.txt | 14 +- .../parts_quantity_increase_95_96.txt | 24 +-- tests/test_plan_refsols/simple_var_std.txt | 10 +- tests/test_plan_refsols/tpch_q20.txt | 24 +-- .../window_filter_order_4.txt | 12 +- tests/test_sql_refsols/correl_13_sqlite.sql | 14 +- tests/test_sql_refsols/correl_14_sqlite.sql | 24 +-- tests/test_sql_refsols/correl_15_sqlite.sql | 29 ++-- .../cryptbank_analysis_04_raw_sqlite.sql | 35 ++--- .../cryptbank_analysis_04_rewrite_sqlite.sql | 35 ++--- .../defog_ewallet_adv10_ansi.sql | 21 +-- .../defog_ewallet_adv10_mysql.sql | 21 +-- .../defog_ewallet_adv10_postgres.sql | 21 +-- .../defog_ewallet_adv10_snowflake.sql | 21 +-- .../defog_ewallet_adv10_sqlite.sql | 21 +-- .../defog_ewallet_adv11_ansi.sql | 30 ++-- .../defog_ewallet_adv11_mysql.sql | 24 ++- .../defog_ewallet_adv11_postgres.sql | 30 ++-- .../defog_ewallet_adv11_snowflake.sql | 30 ++-- .../defog_ewallet_adv11_sqlite.sql | 38 ++--- .../defog_ewallet_adv16_ansi.sql | 22 +-- .../defog_ewallet_adv16_mysql.sql | 22 +-- .../defog_ewallet_adv16_postgres.sql | 22 +-- .../defog_ewallet_adv16_snowflake.sql | 22 +-- .../defog_ewallet_adv16_sqlite.sql | 22 +-- .../defog_ewallet_adv1_ansi.sql | 22 +-- .../defog_ewallet_adv1_mysql.sql | 22 +-- .../defog_ewallet_adv1_postgres.sql | 22 +-- .../defog_ewallet_adv1_snowflake.sql | 22 +-- .../defog_ewallet_adv1_sqlite.sql | 22 +-- .../defog_ewallet_adv3_ansi.sql | 18 +-- .../defog_ewallet_adv3_mysql.sql | 18 +-- .../defog_ewallet_adv3_postgres.sql | 18 +-- .../defog_ewallet_adv3_snowflake.sql | 18 +-- .../defog_ewallet_adv3_sqlite.sql | 18 +-- .../defog_ewallet_adv8_ansi.sql | 28 ++-- .../defog_ewallet_adv8_mysql.sql | 28 ++-- .../defog_ewallet_adv8_postgres.sql | 28 ++-- .../defog_ewallet_adv8_snowflake.sql | 28 ++-- .../defog_ewallet_adv8_sqlite.sql | 28 ++-- .../nation_acctbal_breakdown_ansi.sql | 30 ++-- .../nation_acctbal_breakdown_mysql.sql | 27 ++-- .../nation_acctbal_breakdown_postgres.sql | 36 ++--- .../nation_acctbal_breakdown_snowflake.sql | 30 ++-- .../nation_acctbal_breakdown_sqlite.sql | 27 ++-- .../test_sql_refsols/simple_var_std_ansi.sql | 31 ++-- .../test_sql_refsols/simple_var_std_mysql.sql | 134 +++++++++-------- .../simple_var_std_postgres.sql | 31 ++-- .../simple_var_std_snowflake.sql | 31 ++-- .../simple_var_std_sqlite.sql | 142 ++++++++++-------- tests/test_sql_refsols/tpch_q20_ansi.sql | 46 +++--- tests/test_sql_refsols/tpch_q20_mysql.sql | 46 +++--- tests/test_sql_refsols/tpch_q20_postgres.sql | 46 +++--- tests/test_sql_refsols/tpch_q20_snowflake.sql | 46 +++--- tests/test_sql_refsols/tpch_q20_sqlite.sql | 46 +++--- 71 files changed, 926 insertions(+), 1117 deletions(-) rename pydough/conversion/{aggregate_join_transpose.py => join_aggregate_transpose.py} (57%) diff --git a/pydough/conversion/aggregate_join_transpose.py b/pydough/conversion/join_aggregate_transpose.py similarity index 57% rename from pydough/conversion/aggregate_join_transpose.py rename to pydough/conversion/join_aggregate_transpose.py index 3c9b00719..c6a0504ef 100644 --- a/pydough/conversion/aggregate_join_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -20,7 +20,7 @@ RelationalRoot, RelationalShuttle, ) -from pydough.relational.rel_util import add_input_name, apply_substitution +from pydough.relational.rel_util import apply_substitution class JoinAggregateTransposeShuttle(RelationalShuttle): @@ -141,13 +141,21 @@ def join_aggregate_transpose( ): return None + agg_alias: str | None = ( + join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + ) + non_agg_alias: str | None = ( + join.default_input_aliases[1] if is_left else join.default_input_aliases[0] + ) + # A mapping that will be used to map every expression with regards to # the original join looking at its input expressions to what the # expression will be in the output columns of the new aggregate new_join_columns: dict[str, RelationalExpression] = {} - new_aggregate_aggs: dict[str, CallExpression] = {} - new_aggregate_keys: dict[str, RelationalExpression] = {} + new_aggregate_keys: dict[str, RelationalExpression] = dict(aggregate.keys) + new_aggregate_aggs: dict[str, CallExpression] = dict(aggregate.aggregations) + new_agg_names: set[str] = set(aggregate.keys) | set(aggregate.aggregations) agg_input: RelationalNode = aggregate.inputs[0] non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] @@ -155,13 +163,49 @@ def join_aggregate_transpose( [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] ) + # Ensure all of the aggregate keys are column references + key_columns: dict[str, RelationalExpression] = {} + if any( + not isinstance(expr, ColumnReference) for expr in aggregate.keys.values() + ): + agg_input_project: dict[str, RelationalExpression] = {} + for col_name, col_expr in agg_input.columns.items(): + agg_input_project[col_name] = ColumnReference( + col_name, col_expr.data_type + ) + for key_name, key_expr in aggregate.keys.items(): + if not isinstance(key_expr, ColumnReference): + new_key_name: str = self.generate_name(key_name, agg_input_project) + agg_input_project[new_key_name] = key_expr + key_columns[key_name] = ColumnReference( + new_key_name, key_expr.data_type + ) + else: + key_columns[key_name] = key_expr + agg_input = Project(agg_input, agg_input_project) + else: + key_columns.update(aggregate.keys) + join_reverse_map: dict[RelationalExpression, set[str]] = {} for col_name, expr in join.columns.items(): if expr not in join_reverse_map: join_reverse_map[expr] = set() join_reverse_map[expr].add(col_name) + agg_reverse_map: dict[RelationalExpression, set[str]] = {} + for name, expr in aggregate.columns.items(): + ref_expr: ColumnReference = ColumnReference(name, expr.data_type, agg_alias) + if ref_expr in join_reverse_map: + if expr not in agg_reverse_map: + agg_reverse_map[expr] = set() + agg_reverse_map[expr].update(join_reverse_map[ref_expr]) + project_columns: dict[str, RelationalExpression] = {} + for col_name, col_expr in aggregate.columns.items(): + for proj_name in agg_reverse_map.get(col_expr, []): + project_columns[proj_name] = ColumnReference( + col_name, col_expr.data_type + ) new_cardinality: JoinCardinality = join.cardinality new_reverse_cardinality: JoinCardinality = join.reverse_cardinality @@ -170,18 +214,68 @@ def join_aggregate_transpose( else: new_cardinality = new_cardinality.add_plural() - agg_alias: str | None = ( - join.default_input_aliases[0] if is_left else join.default_input_aliases[1] - ) - (join.default_input_aliases[1] if is_left else join.default_input_aliases[0]) - agg_key_substitution: dict[RelationalExpression, RelationalExpression] = {} - for key_name, key_expr in aggregate.keys.items(): - sided_key: RelationalExpression = ColumnReference( - key_name, key_expr.data_type, agg_alias + join_substitutions: dict[RelationalExpression, RelationalExpression] = {} + + agg_input_mapping: dict[str, str] = {} + for col_name, col_expr in agg_input.columns.items(): + new_join_columns[col_name] = ColumnReference( + col_name, col_expr.data_type, agg_alias + ) + agg_input_mapping[col_name] = col_name + + for col_name, col_expr in non_agg_input.columns.items(): + new_col_name: str = col_name + if new_col_name in new_join_columns: + new_col_name = self.generate_name(col_name, new_join_columns) + assert col_name not in new_join_columns + new_join_columns[new_col_name] = ColumnReference( + col_name, col_expr.data_type, non_agg_alias + ) + agg_input_mapping[col_name] = new_col_name + + agg_col_name: str = new_col_name + if agg_col_name in new_agg_names: + agg_col_name = self.generate_name(new_col_name, new_agg_names) + new_aggregate_aggs[agg_col_name] = CallExpression( + pydop.ANYTHING, + col_expr.data_type, + [ColumnReference(new_col_name, col_expr.data_type)], ) - agg_key_substitution[sided_key] = add_input_name(key_expr, agg_alias) + new_agg_names.add(agg_col_name) + non_ref: ColumnReference = ColumnReference( + col_name, col_expr.data_type, non_agg_alias + ) + for proj_name in join_reverse_map.get(non_ref, []): + project_columns[proj_name] = ColumnReference( + agg_col_name, col_expr.data_type + ) + + # TODO: POPULATE JOIN_SUBSTITUTIONS + + # TODO: + # Build join with every column from both inputs + # build mapping of each column in the two inputs to its new name + + # agg_key_substitution: dict[RelationalExpression, RelationalExpression] = {} + # for key_name, key_expr in aggregate.keys.items(): + # new_key_expr: RelationalExpression = add_input_name(key_expr, agg_alias) + # new_key_ref: ColumnReference = ColumnReference(key_name, key_expr.data_type) + # new_join_columns[key_name] = new_key_expr + # new_aggregate_keys[key_name] = new_key_ref + # if new_key_ref in agg_reverse_map: + # for col_name in agg_reverse_map[new_key_ref]: + # project_columns[col_name] = new_key_ref + # if isinstance(key_expr, ColumnReference) and key_expr.name == key_name: + # continue + # sided_key: RelationalExpression = ColumnReference( + # key_name, key_expr.data_type, agg_alias + # ) + # agg_key_substitution[sided_key] = new_key_expr + + # for agg_name, agg_call in aggregate.aggregations.items(): + new_condition: RelationalExpression = apply_substitution( - join.condition, agg_key_substitution, {} + join.condition, join_substitutions, {} ) new_join: Join = Join( @@ -194,15 +288,25 @@ def join_aggregate_transpose( join.correl_name, ) - return None - breakpoint() - assert False - new_aggregate: Aggregate = Aggregate( new_join, new_aggregate_keys, new_aggregate_aggs ) - return Project(new_aggregate, project_columns) + new_project: Project = Project(new_aggregate, project_columns) + + print() + print(join.to_tree_string()) + + print() + print(new_join.to_tree_string()) + + print() + print(new_project.to_tree_string()) + + # breakpoint() + # assert False + + return new_project def pull_aggregates_above_joins(node: RelationalRoot) -> RelationalNode: diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 5a50cb2bf..1c327a03b 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -55,7 +55,6 @@ from .agg_removal import remove_redundant_aggs from .agg_split import split_partial_aggregates -from .aggregate_join_transpose import pull_aggregates_above_joins from .column_bubbler import bubble_column_names from .filter_pushdown import push_filters from .hybrid_connection import ConnectionType, HybridConnection @@ -86,6 +85,7 @@ ) from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree +from .join_aggregate_transpose import pull_aggregates_above_joins from .masking_shuttles import MaskLiteralComparisonShuttle from .merge_projects import merge_projects from .projection_pullup import pullup_projections @@ -1546,9 +1546,6 @@ def optimize_relational_tree( pruner: ColumnPruner = ColumnPruner() root = pruner.prune_unused_columns(root) - print() - print(root.to_tree_string()) - # Bubble up names from the leaf nodes to further encourage simpler naming # without aliases, and also to delete duplicate columns where possible. # This is done early to maximize the chances that a nicer name will be used @@ -1657,9 +1654,6 @@ def convert_ast_to_relational( hybrid_translator: HybridTranslator = HybridTranslator(session) hybrid: HybridTree = hybrid_translator.convert_qdag_to_hybrid(node) - print() - print(hybrid) - # Then, invoke relational conversion procedure. The first element in the # returned list is the final relational tree. output: TranslationOutput = rel_translator.rel_translation( diff --git a/tests/test_plan_refsols/avg_order_diff_per_customer.txt b/tests/test_plan_refsols/avg_order_diff_per_customer.txt index e6a8403ae..a3965cd56 100644 --- a/tests/test_plan_refsols/avg_order_diff_per_customer.txt +++ b/tests/test_plan_refsols/avg_order_diff_per_customer.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('name', c_name), ('avg_diff', avg_day_diff)], orderings=[(avg_day_diff):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_day_diff': t1.avg_day_diff, 'c_name': t0.c_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'avg_day_diff': AVG(day_diff)}) +ROOT(columns=[('name', anything_c_name), ('avg_diff', avg_day_diff)], orderings=[(avg_day_diff):desc_last], limit=5:numeric) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'avg_day_diff': AVG(day_diff)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'day_diff': t1.day_diff, 'o_custkey': t1.o_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) PROJECT(columns={'day_diff': DATEDIFF('days':string, PREV(args=[o_orderdate], partition=[o_custkey], order=[(o_orderdate):asc_last]), o_orderdate), 'o_custkey': o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 33cb44ec2..46f5f7a1e 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,15 +1,15 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) +ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('part_qty', anything_ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) + JOIN(condition=t0.s_suppkey == t1.anything_ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'anything_ps_availqty': t1.anything_ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_ps_availqty': ANYTHING(ps_availqty), 'anything_ps_suppkey': ANYTHING(ps_suppkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_ae.txt b/tests/test_plan_refsols/common_prefix_ae.txt index a81f03615..2cc3fd84c 100644 --- a/tests/test_plan_refsols/common_prefix_ae.txt +++ b/tests/test_plan_refsols/common_prefix_ae.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_c_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'max_c_name': t1.max_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_name': MAX(c_name), 'n_rows': COUNT()}) +ROOT(columns=[('nation_name', anything_n_name), ('n_customers', n_rows), ('customer_name', max_c_name)], orderings=[(anything_n_name):asc_first]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'max_c_name': MAX(c_name), 'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index fda9b3df0..7f3114112 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,18 +1,18 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) - LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows_1}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={}) +ROOT(columns=[('cust_key', anything_anything_c_custkey), ('n_orders', DEFAULT_TO(anything_anything_n_rows, 0:numeric)), ('n_no_tax_discount', anything_n_rows)], orderings=[(anything_anything_c_custkey):asc_first]) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_anything_c_custkey': ANYTHING(anything_c_custkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_n_rows': ANYTHING(n_rows)}) + JOIN(condition=t0.anything_c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_c_custkey': t0.anything_c_custkey, 'anything_n_rows': t0.anything_n_rows, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) + LIMIT(limit=10:numeric, columns={'anything_c_custkey': anything_c_custkey, 'anything_n_rows': anything_n_rows, 'n_rows': n_rows}, orderings=[(anything_c_custkey):asc_first]) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_custkey': ANYTHING(c_custkey), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 653b6db49..3fbeb60a7 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) +ROOT(columns=[('key', anything_o_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (anything_o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'anything_o_orderkey': anything_o_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.anything_o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'anything_o_orderkey': t0.anything_o_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'anything_o_orderkey': ANYTHING(o_orderkey), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t1.n_rows, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 1b9a18e15..e9c28a52e 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) +ROOT(columns=[('name', anything_c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (anything_c_name):asc_first], limit=5:numeric) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_name': t0.c_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_quantity': t1.l_quantity, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/correl_13.txt b/tests/test_plan_refsols/correl_13.txt index 1bb720dea..781307ab7 100644 --- a/tests/test_plan_refsols/correl_13.txt +++ b/tests/test_plan_refsols/correl_13.txt @@ -1,9 +1,9 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) - FILTER(condition=s_nationkey <= 3:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t1.ps_suppkey}) + FILTER(condition=s_nationkey <= 3:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 1ad331485..32a6e4466 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,11 +1,11 @@ -ROOT(columns=[('n', ndistinct_s_suppkey)], orderings=[]) - AGGREGATE(keys={}, aggregations={'ndistinct_s_suppkey': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 's_suppkey': t0.s_suppkey}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) +ROOT(columns=[('n', ndistinct_anything_s_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_anything_s_suppkey': NDISTINCT(anything_s_suppkey)}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey}) + JOIN(condition=t0.anything_s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey, 'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_s_suppkey': ANYTHING(s_suppkey), 'avg_p_retailprice': AVG(p_retailprice)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey, 's_suppkey': t0.s_suppkey}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 7ee7d9d0f..82b371c39 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,14 +1,14 @@ -ROOT(columns=[('n', ndistinct_s_suppkey)], orderings=[]) - AGGREGATE(keys={}, aggregations={'ndistinct_s_suppkey': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.avg_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) +ROOT(columns=[('n', ndistinct_anything_s_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_anything_s_suppkey': NDISTINCT(anything_s_suppkey)}) + JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey}) + JOIN(condition=t0.anything_s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'anything_s_suppkey': t0.anything_s_suppkey, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'anything_s_suppkey': ANYTHING(s_suppkey), 'supplier_avg_price': AVG(p_retailprice)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_6.txt b/tests/test_plan_refsols/correl_6.txt index 3b711b4d7..f3f3f98dc 100644 --- a/tests/test_plan_refsols/correl_6.txt +++ b/tests/test_plan_refsols/correl_6.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', r_name), ('n_prefix_nations', n_rows)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('name', anything_r_name), ('n_prefix_nations', n_rows)], orderings=[]) + AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'anything_r_name': ANYTHING(r_name), 'n_rows': COUNT()}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_04_raw.txt b/tests/test_plan_refsols/cryptbank_analysis_04_raw.txt index 9f7bfc6df..9f348a995 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_04_raw.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_04_raw.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('cust_name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname])))), ('n_trans', n_rows)], orderings=[(UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)):asc_first]) - JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 'n_rows': t1.n_rows}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=MONOTONIC(1980:numeric, YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('key', UNMASK::(CASE WHEN [anything_a_key] = 0 THEN 0 ELSE (CASE WHEN [anything_a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([anything_a_key], 1 + INSTR([anything_a_key], '-'), LENGTH([anything_a_key]) / 2) AS INTEGER) END)), ('cust_name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([anything_c_fname])), UNMASK::(LOWER([anything_c_lname])))), ('n_trans', n_rows)], orderings=[(UNMASK::(CASE WHEN [anything_a_key] = 0 THEN 0 ELSE (CASE WHEN [anything_a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([anything_a_key], 1 + INSTR([anything_a_key], '-'), LENGTH([anything_a_key]) / 2) AS INTEGER) END)):asc_first]) + AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'anything_a_key': ANYTHING(a_key), 'anything_c_fname': ANYTHING(c_fname), 'anything_c_lname': ANYTHING(c_lname), 'n_rows': COUNT()}) + JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 't_sourceaccount': t1.t_sourceaccount}) + JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) + FILTER(condition=MONOTONIC(1980:numeric, YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) FILTER(condition=UNMASK::((1025.67 - ([t_amount]))) > 9000.0:numeric, columns={'t_sourceaccount': t_sourceaccount}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_sourceaccount': t_sourceaccount}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt b/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt index 9f7bfc6df..9f348a995 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('cust_name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname])))), ('n_trans', n_rows)], orderings=[(UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)):asc_first]) - JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 'n_rows': t1.n_rows}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=MONOTONIC(1980:numeric, YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('key', UNMASK::(CASE WHEN [anything_a_key] = 0 THEN 0 ELSE (CASE WHEN [anything_a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([anything_a_key], 1 + INSTR([anything_a_key], '-'), LENGTH([anything_a_key]) / 2) AS INTEGER) END)), ('cust_name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([anything_c_fname])), UNMASK::(LOWER([anything_c_lname])))), ('n_trans', n_rows)], orderings=[(UNMASK::(CASE WHEN [anything_a_key] = 0 THEN 0 ELSE (CASE WHEN [anything_a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([anything_a_key], 1 + INSTR([anything_a_key], '-'), LENGTH([anything_a_key]) / 2) AS INTEGER) END)):asc_first]) + AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'anything_a_key': ANYTHING(a_key), 'anything_c_fname': ANYTHING(c_fname), 'anything_c_lname': ANYTHING(c_lname), 'n_rows': COUNT()}) + JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 't_sourceaccount': t1.t_sourceaccount}) + JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) + FILTER(condition=MONOTONIC(1980:numeric, YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) FILTER(condition=UNMASK::((1025.67 - ([t_amount]))) > 9000.0:numeric, columns={'t_sourceaccount': t_sourceaccount}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_sourceaccount': t_sourceaccount}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 557fe4d71..e30507b7e 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_revenue_delta) > max_revenue_delta, min_revenue_delta, max_revenue_delta))], orderings=[(IFF(ABS(min_revenue_delta) > max_revenue_delta, min_revenue_delta, max_revenue_delta)):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_revenue_delta': t1.max_revenue_delta, 'min_revenue_delta': t1.min_revenue_delta}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_revenue_delta': MAX(revenue_delta), 'min_revenue_delta': MIN(revenue_delta)}) +ROOT(columns=[('name', anything_c_name), ('largest_diff', IFF(ABS(min_revenue_delta) > max_revenue_delta, min_revenue_delta, max_revenue_delta))], orderings=[(IFF(ABS(min_revenue_delta) > max_revenue_delta, min_revenue_delta, max_revenue_delta)):desc_last], limit=5:numeric) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'max_revenue_delta': MAX(revenue_delta), 'min_revenue_delta': MIN(revenue_delta)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'revenue_delta': t1.revenue_delta}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 900f198ff..44171bcf6 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,11 +1,11 @@ ROOT(columns=[('year', year_o_orderdate), ('month', month_o_orderdate)], orderings=[(year_o_orderdate):asc_first, (month_o_orderdate):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last, (month_o_orderdate):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last, (month_o_orderdate):asc_last], default=0.0), columns={'month_o_orderdate': month_o_orderdate, 'year_o_orderdate': year_o_orderdate}) - JOIN(condition=t0.year_o_orderdate == t1.year_o_orderdate, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month_o_orderdate': t1.month_o_orderdate, 'sum_o_totalprice': t1.sum_o_totalprice, 'year_o_orderdate': t1.year_o_orderdate}) - FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > next_year_total_spent, columns={'year_o_orderdate': year_o_orderdate}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last], default=0.0), 'sum_o_totalprice': sum_o_totalprice, 'year_o_orderdate': year_o_orderdate}) - AGGREGATE(keys={'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month_o_orderdate': MONTH(o_orderdate), 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + AGGREGATE(keys={'month_o_orderdate': MONTH(o_orderdate), 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + JOIN(condition=t0.year_o_orderdate == t1.year_o_orderdate, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > next_year_total_spent, columns={'year_o_orderdate': year_o_orderdate}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last], default=0.0), 'sum_o_totalprice': sum_o_totalprice, 'year_o_orderdate': year_o_orderdate}) + AGGREGATE(keys={'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/nation_acctbal_breakdown.txt b/tests/test_plan_refsols/nation_acctbal_breakdown.txt index 998776cc3..3ea59b65a 100644 --- a/tests/test_plan_refsols/nation_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/nation_acctbal_breakdown.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', n_name), ('n_red_acctbal', count_negative_acctbal), ('n_black_acctbal', count_non_negative_acctbal), ('median_red_acctbal', median_negative_acctbal), ('median_black_acctbal', median_non_negative_acctbal), ('median_overall_acctbal', median_c_acctbal)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'count_negative_acctbal': t1.count_negative_acctbal, 'count_non_negative_acctbal': t1.count_non_negative_acctbal, 'median_c_acctbal': t1.median_c_acctbal, 'median_negative_acctbal': t1.median_negative_acctbal, 'median_non_negative_acctbal': t1.median_non_negative_acctbal, 'n_name': t0.n_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_negative_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'count_non_negative_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_c_acctbal': MEDIAN(c_acctbal), 'median_negative_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'median_non_negative_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric))}) +ROOT(columns=[('nation_name', anything_n_name), ('n_red_acctbal', count_negative_acctbal), ('n_black_acctbal', count_non_negative_acctbal), ('median_red_acctbal', median_negative_acctbal), ('median_black_acctbal', median_non_negative_acctbal), ('median_overall_acctbal', median_c_acctbal)], orderings=[(anything_n_name):asc_first]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_negative_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'count_non_negative_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_c_acctbal': MEDIAN(c_acctbal), 'median_negative_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'median_non_negative_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric))}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 16762bac8..488f4fd87 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,15 +1,15 @@ -ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) +ROOT(columns=[('name', anything_anything_p_name), ('qty_95', DEFAULT_TO(anything_sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(anything_sum_l_quantity, 0:numeric)):desc_last, (anything_anything_p_name):asc_first], limit=3:numeric) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'agg_1': SUM(l_quantity), 'anything_anything_p_name': ANYTHING(anything_p_name), 'anything_sum_l_quantity': ANYTHING(sum_l_quantity)}) + JOIN(condition=t0.anything_p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t0.anything_p_name, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'sum_l_quantity': t0.sum_l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_p_partkey': ANYTHING(p_partkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) diff --git a/tests/test_plan_refsols/simple_var_std.txt b/tests/test_plan_refsols/simple_var_std.txt index 56d9687d4..89f1ca65b 100644 --- a/tests/test_plan_refsols/simple_var_std.txt +++ b/tests/test_plan_refsols/simple_var_std.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('name', n_name), ('var', population_var_s_acctbal), ('std', population_std_s_acctbal), ('sample_var', sample_var_s_acctbal), ('sample_std', sample_std_s_acctbal), ('pop_var', population_var_s_acctbal), ('pop_std', population_std_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'population_std_s_acctbal': t1.population_std_s_acctbal, 'population_var_s_acctbal': t1.population_var_s_acctbal, 'sample_std_s_acctbal': t1.sample_std_s_acctbal, 'sample_var_s_acctbal': t1.sample_var_s_acctbal}) - FILTER(condition=ISIN(n_name, ['ALGERIA', 'ARGENTINA']:array[unknown]), columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'population_std_s_acctbal': POPULATION_STD(s_acctbal), 'population_var_s_acctbal': POPULATION_VAR(s_acctbal), 'sample_std_s_acctbal': SAMPLE_STD(s_acctbal), 'sample_var_s_acctbal': SAMPLE_VAR(s_acctbal)}) +ROOT(columns=[('name', anything_n_name), ('var', population_var_s_acctbal), ('std', population_std_s_acctbal), ('sample_var', sample_var_s_acctbal), ('sample_std', sample_std_s_acctbal), ('pop_var', population_var_s_acctbal), ('pop_std', population_std_s_acctbal)], orderings=[]) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'population_std_s_acctbal': POPULATION_STD(s_acctbal), 'population_var_s_acctbal': POPULATION_VAR(s_acctbal), 'sample_std_s_acctbal': SAMPLE_STD(s_acctbal), 'sample_var_s_acctbal': SAMPLE_VAR(s_acctbal)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_acctbal': t1.s_acctbal, 's_nationkey': t1.s_nationkey}) + FILTER(condition=ISIN(n_name, ['ALGERIA', 'ARGENTINA']:array[unknown]), columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 012ae3c85..5d72b9bb2 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -1,15 +1,15 @@ -ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) - JOIN(condition=t0.ps_partkey == t1.p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) +ROOT(columns=[('S_NAME', anything_s_name), ('S_ADDRESS', anything_s_address)], orderings=[(anything_s_name):asc_first], limit=10:numeric) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_s_address': ANYTHING(s_address), 'anything_s_name': ANYTHING(s_name)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t1.ps_suppkey, 's_address': t0.s_address, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.ps_partkey == t1.anything_p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_partkey': ANYTHING(p_partkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_partkey': t0.p_partkey}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/window_filter_order_4.txt b/tests/test_plan_refsols/window_filter_order_4.txt index 06f4e0403..c8c5f1d0e 100644 --- a/tests/test_plan_refsols/window_filter_order_4.txt +++ b/tests/test_plan_refsols/window_filter_order_4.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_sql_refsols/correl_13_sqlite.sql b/tests/test_sql_refsols/correl_13_sqlite.sql index c96106b05..defc86a28 100644 --- a/tests/test_sql_refsols/correl_13_sqlite.sql +++ b/tests/test_sql_refsols/correl_13_sqlite.sql @@ -1,18 +1,18 @@ -WITH _s3 AS ( +WITH _t0 AS ( SELECT DISTINCT partsupp.ps_suppkey - FROM tpch.partsupp AS partsupp + FROM tpch.supplier AS supplier + JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey JOIN tpch.part AS part ON part.p_container LIKE 'SM%' AND part.p_partkey = partsupp.ps_partkey AND part.p_retailprice < ( partsupp.ps_supplycost * 1.5 ) + WHERE + supplier.s_nationkey <= 3 ) SELECT COUNT(*) AS n -FROM tpch.supplier AS supplier -JOIN _s3 AS _s3 - ON _s3.ps_suppkey = supplier.s_suppkey -WHERE - supplier.s_nationkey <= 3 +FROM _t0 diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index fdf2e036d..cea605abf 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,26 +1,26 @@ -WITH _s3 AS ( +WITH _s4 AS ( SELECT - partsupp.ps_suppkey, + MAX(supplier.s_suppkey) AS anything_s_suppkey, AVG(part.p_retailprice) AS avg_p_retailprice - FROM tpch.partsupp AS partsupp + FROM tpch.supplier AS supplier + JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey + WHERE + supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 GROUP BY - 1 + partsupp.ps_suppkey ) SELECT - COUNT(DISTINCT supplier.s_suppkey) AS n -FROM tpch.supplier AS supplier -JOIN _s3 AS _s3 - ON _s3.ps_suppkey = supplier.s_suppkey + COUNT(DISTINCT _s4.anything_s_suppkey) AS n +FROM _s4 AS _s4 JOIN tpch.partsupp AS partsupp - ON partsupp.ps_suppkey = supplier.s_suppkey + ON _s4.anything_s_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part - ON _s3.avg_p_retailprice > part.p_retailprice + ON _s4.avg_p_retailprice > part.p_retailprice AND part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey AND part.p_retailprice < ( partsupp.ps_supplycost * 1.5 ) -WHERE - supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index 65487aef4..10c89c472 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -2,31 +2,32 @@ WITH _s0 AS ( SELECT AVG(p_retailprice) AS avg_p_retailprice FROM tpch.part -), _s5 AS ( +), _s6 AS ( SELECT - partsupp.ps_suppkey, - AVG(part.p_retailprice) AS avg_p_retailprice - FROM tpch.partsupp AS partsupp + MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, + MAX(supplier.s_suppkey) AS anything_s_suppkey, + AVG(part.p_retailprice) AS supplier_avg_price + FROM _s0 AS _s0 + JOIN tpch.supplier AS supplier + ON supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 + JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey GROUP BY - 1 + partsupp.ps_suppkey ) SELECT - COUNT(DISTINCT supplier.s_suppkey) AS n -FROM _s0 AS _s0 -JOIN tpch.supplier AS supplier - ON supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 -JOIN _s5 AS _s5 - ON _s5.ps_suppkey = supplier.s_suppkey + COUNT(DISTINCT _s6.anything_s_suppkey) AS n +FROM _s6 AS _s6 JOIN tpch.partsupp AS partsupp - ON partsupp.ps_suppkey = supplier.s_suppkey + ON _s6.anything_s_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part - ON _s5.avg_p_retailprice > part.p_retailprice + ON _s6.supplier_avg_price > part.p_retailprice AND part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey AND part.p_retailprice < ( - _s0.avg_p_retailprice * 0.85 + _s6.anything_avg_p_retailprice * 0.85 ) AND part.p_retailprice < ( partsupp.ps_supplycost * 1.5 diff --git a/tests/test_sql_refsols/cryptbank_analysis_04_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_analysis_04_raw_sqlite.sql index b21c053ef..82c586b20 100644 --- a/tests/test_sql_refsols/cryptbank_analysis_04_raw_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_analysis_04_raw_sqlite.sql @@ -1,27 +1,15 @@ -WITH _s3 AS ( - SELECT - t_sourceaccount, - COUNT(*) AS n_rows - FROM crbnk.transactions - WHERE - ( - 1025.67 - t_amount - ) > 9000.0 - GROUP BY - 1 -) SELECT CASE - WHEN accounts.a_key = 0 + WHEN MAX(accounts.a_key) = 0 THEN 0 - ELSE CASE WHEN accounts.a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING( - accounts.a_key, - 1 + INSTR(accounts.a_key, '-'), - CAST(LENGTH(accounts.a_key) AS REAL) / 2 + ELSE CASE WHEN MAX(accounts.a_key) > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING( + MAX(accounts.a_key), + 1 + INSTR(MAX(accounts.a_key), '-'), + CAST(LENGTH(MAX(accounts.a_key)) AS REAL) / 2 ) AS INTEGER) END AS key, - CONCAT_WS(' ', LOWER(customers.c_fname), LOWER(customers.c_lname)) AS cust_name, - _s3.n_rows AS n_trans + CONCAT_WS(' ', LOWER(MAX(customers.c_fname)), LOWER(MAX(customers.c_lname))) AS cust_name, + COUNT(*) AS n_trans FROM crbnk.accounts AS accounts JOIN crbnk.customers AS customers ON CAST(STRFTIME('%Y', DATE(customers.c_birthday, '+472 days')) AS INTEGER) <= 1985 @@ -29,8 +17,11 @@ JOIN crbnk.customers AS customers AND accounts.a_custkey = ( 42 - customers.c_key ) -JOIN _s3 AS _s3 - ON _s3.t_sourceaccount = CASE +JOIN crbnk.transactions AS transactions + ON ( + 1025.67 - transactions.t_amount + ) > 9000.0 + AND transactions.t_sourceaccount = CASE WHEN accounts.a_key = 0 THEN 0 ELSE CASE WHEN accounts.a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING( @@ -39,5 +30,7 @@ JOIN _s3 AS _s3 CAST(LENGTH(accounts.a_key) AS REAL) / 2 ) AS INTEGER) END +GROUP BY + transactions.t_sourceaccount ORDER BY 1 diff --git a/tests/test_sql_refsols/cryptbank_analysis_04_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_analysis_04_rewrite_sqlite.sql index b21c053ef..82c586b20 100644 --- a/tests/test_sql_refsols/cryptbank_analysis_04_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_analysis_04_rewrite_sqlite.sql @@ -1,27 +1,15 @@ -WITH _s3 AS ( - SELECT - t_sourceaccount, - COUNT(*) AS n_rows - FROM crbnk.transactions - WHERE - ( - 1025.67 - t_amount - ) > 9000.0 - GROUP BY - 1 -) SELECT CASE - WHEN accounts.a_key = 0 + WHEN MAX(accounts.a_key) = 0 THEN 0 - ELSE CASE WHEN accounts.a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING( - accounts.a_key, - 1 + INSTR(accounts.a_key, '-'), - CAST(LENGTH(accounts.a_key) AS REAL) / 2 + ELSE CASE WHEN MAX(accounts.a_key) > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING( + MAX(accounts.a_key), + 1 + INSTR(MAX(accounts.a_key), '-'), + CAST(LENGTH(MAX(accounts.a_key)) AS REAL) / 2 ) AS INTEGER) END AS key, - CONCAT_WS(' ', LOWER(customers.c_fname), LOWER(customers.c_lname)) AS cust_name, - _s3.n_rows AS n_trans + CONCAT_WS(' ', LOWER(MAX(customers.c_fname)), LOWER(MAX(customers.c_lname))) AS cust_name, + COUNT(*) AS n_trans FROM crbnk.accounts AS accounts JOIN crbnk.customers AS customers ON CAST(STRFTIME('%Y', DATE(customers.c_birthday, '+472 days')) AS INTEGER) <= 1985 @@ -29,8 +17,11 @@ JOIN crbnk.customers AS customers AND accounts.a_custkey = ( 42 - customers.c_key ) -JOIN _s3 AS _s3 - ON _s3.t_sourceaccount = CASE +JOIN crbnk.transactions AS transactions + ON ( + 1025.67 - transactions.t_amount + ) > 9000.0 + AND transactions.t_sourceaccount = CASE WHEN accounts.a_key = 0 THEN 0 ELSE CASE WHEN accounts.a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING( @@ -39,5 +30,7 @@ JOIN _s3 AS _s3 CAST(LENGTH(accounts.a_key) AS REAL) / 2 ) AS INTEGER) END +GROUP BY + transactions.t_sourceaccount ORDER BY 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql index 3f7b5e66c..5e419b3c9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql @@ -1,16 +1,9 @@ -WITH _s1 AS ( - SELECT - sender_id, - COUNT(*) AS n_rows - FROM main.wallet_transactions_daily - WHERE - sender_type = 0 - GROUP BY - 1 -) SELECT - users.uid AS user_id, - _s1.n_rows AS total_transactions + ANY_VALUE(users.uid) AS user_id, + COUNT(*) AS total_transactions FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.sender_id = users.uid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON users.uid = wallet_transactions_daily.sender_id + AND wallet_transactions_daily.sender_type = 0 +GROUP BY + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql index 3f7b5e66c..5e419b3c9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql @@ -1,16 +1,9 @@ -WITH _s1 AS ( - SELECT - sender_id, - COUNT(*) AS n_rows - FROM main.wallet_transactions_daily - WHERE - sender_type = 0 - GROUP BY - 1 -) SELECT - users.uid AS user_id, - _s1.n_rows AS total_transactions + ANY_VALUE(users.uid) AS user_id, + COUNT(*) AS total_transactions FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.sender_id = users.uid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON users.uid = wallet_transactions_daily.sender_id + AND wallet_transactions_daily.sender_type = 0 +GROUP BY + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql index 3f7b5e66c..b12f30f70 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql @@ -1,16 +1,9 @@ -WITH _s1 AS ( - SELECT - sender_id, - COUNT(*) AS n_rows - FROM main.wallet_transactions_daily - WHERE - sender_type = 0 - GROUP BY - 1 -) SELECT - users.uid AS user_id, - _s1.n_rows AS total_transactions + MAX(users.uid) AS user_id, + COUNT(*) AS total_transactions FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.sender_id = users.uid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON users.uid = wallet_transactions_daily.sender_id + AND wallet_transactions_daily.sender_type = 0 +GROUP BY + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql index 3f7b5e66c..5e419b3c9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql @@ -1,16 +1,9 @@ -WITH _s1 AS ( - SELECT - sender_id, - COUNT(*) AS n_rows - FROM main.wallet_transactions_daily - WHERE - sender_type = 0 - GROUP BY - 1 -) SELECT - users.uid AS user_id, - _s1.n_rows AS total_transactions + ANY_VALUE(users.uid) AS user_id, + COUNT(*) AS total_transactions FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.sender_id = users.uid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON users.uid = wallet_transactions_daily.sender_id + AND wallet_transactions_daily.sender_type = 0 +GROUP BY + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql index 3f7b5e66c..b12f30f70 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql @@ -1,16 +1,9 @@ -WITH _s1 AS ( - SELECT - sender_id, - COUNT(*) AS n_rows - FROM main.wallet_transactions_daily - WHERE - sender_type = 0 - GROUP BY - 1 -) SELECT - users.uid AS user_id, - _s1.n_rows AS total_transactions + MAX(users.uid) AS user_id, + COUNT(*) AS total_transactions FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.sender_id = users.uid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON users.uid = wallet_transactions_daily.sender_id + AND wallet_transactions_daily.sender_type = 0 +GROUP BY + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql index 3f2270a77..54888423e 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql @@ -1,20 +1,18 @@ -WITH _s1 AS ( - SELECT - user_id, - SUM( - DATEDIFF(CAST(session_end_ts AS DATETIME), CAST(session_start_ts AS DATETIME), SECOND) - ) AS sum_duration - FROM main.user_sessions - WHERE - session_end_ts < '2023-06-08' AND session_start_ts >= '2023-06-01' - GROUP BY - 1 -) SELECT - users.uid, - _s1.sum_duration AS total_duration + ANY_VALUE(users.uid) AS uid, + SUM( + DATEDIFF( + CAST(user_sessions.session_end_ts AS DATETIME), + CAST(user_sessions.session_start_ts AS DATETIME), + SECOND + ) + ) AS total_duration FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.user_sessions AS user_sessions + ON user_sessions.session_end_ts < '2023-06-08' + AND user_sessions.session_start_ts >= '2023-06-01' + AND user_sessions.user_id = users.uid +GROUP BY + user_sessions.user_id ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql index 8c68cd3c4..beecc5327 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql @@ -1,18 +1,14 @@ -WITH _s1 AS ( - SELECT - user_id, - SUM(TIMESTAMPDIFF(SECOND, session_start_ts, session_end_ts)) AS sum_duration - FROM main.user_sessions - WHERE - session_end_ts < '2023-06-08' AND session_start_ts >= '2023-06-01' - GROUP BY - 1 -) SELECT - users.uid, - _s1.sum_duration AS total_duration + ANY_VALUE(users.uid) AS uid, + SUM( + TIMESTAMPDIFF(SECOND, user_sessions.session_start_ts, user_sessions.session_end_ts) + ) AS total_duration FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.user_sessions AS user_sessions + ON user_sessions.session_end_ts < '2023-06-08' + AND user_sessions.session_start_ts >= '2023-06-01' + AND user_sessions.user_id = users.uid +GROUP BY + user_sessions.user_id ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql index 53fd12a9c..03f38d12f 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql @@ -1,22 +1,16 @@ -WITH _s1 AS ( - SELECT - user_id, - SUM( - EXTRACT(EPOCH FROM ( - CAST(session_end_ts AS TIMESTAMP) - CAST(session_start_ts AS TIMESTAMP) - )) - ) AS sum_duration - FROM main.user_sessions - WHERE - session_end_ts < '2023-06-08' AND session_start_ts >= '2023-06-01' - GROUP BY - 1 -) SELECT - users.uid, - _s1.sum_duration AS total_duration + MAX(users.uid) AS uid, + SUM( + EXTRACT(EPOCH FROM ( + CAST(user_sessions.session_end_ts AS TIMESTAMP) - CAST(user_sessions.session_start_ts AS TIMESTAMP) + )) + ) AS total_duration FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.user_sessions AS user_sessions + ON user_sessions.session_end_ts < '2023-06-08' + AND user_sessions.session_start_ts >= '2023-06-01' + AND user_sessions.user_id = users.uid +GROUP BY + user_sessions.user_id ORDER BY 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql index 75d2470d8..ad121c7f4 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql @@ -1,20 +1,18 @@ -WITH _s1 AS ( - SELECT - user_id, - SUM( - DATEDIFF(SECOND, CAST(session_start_ts AS DATETIME), CAST(session_end_ts AS DATETIME)) - ) AS sum_duration - FROM main.user_sessions - WHERE - session_end_ts < '2023-06-08' AND session_start_ts >= '2023-06-01' - GROUP BY - 1 -) SELECT - users.uid, - _s1.sum_duration AS total_duration + ANY_VALUE(users.uid) AS uid, + SUM( + DATEDIFF( + SECOND, + CAST(user_sessions.session_start_ts AS DATETIME), + CAST(user_sessions.session_end_ts AS DATETIME) + ) + ) AS total_duration FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.user_sessions AS user_sessions + ON user_sessions.session_end_ts < '2023-06-08' + AND user_sessions.session_start_ts >= '2023-06-01' + AND user_sessions.user_id = users.uid +GROUP BY + user_sessions.user_id ORDER BY 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql index ad97e11a0..f26571a07 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql @@ -1,26 +1,20 @@ -WITH _s1 AS ( - SELECT - user_id, - SUM( - ( - ( - CAST(( - JULIANDAY(DATE(session_end_ts, 'start of day')) - JULIANDAY(DATE(session_start_ts, 'start of day')) - ) AS INTEGER) * 24 + CAST(STRFTIME('%H', session_end_ts) AS INTEGER) - CAST(STRFTIME('%H', session_start_ts) AS INTEGER) - ) * 60 + CAST(STRFTIME('%M', session_end_ts) AS INTEGER) - CAST(STRFTIME('%M', session_start_ts) AS INTEGER) - ) * 60 + CAST(STRFTIME('%S', session_end_ts) AS INTEGER) - CAST(STRFTIME('%S', session_start_ts) AS INTEGER) - ) AS sum_duration - FROM main.user_sessions - WHERE - session_end_ts < '2023-06-08' AND session_start_ts >= '2023-06-01' - GROUP BY - 1 -) SELECT - users.uid, - _s1.sum_duration AS total_duration + MAX(users.uid) AS uid, + SUM( + ( + ( + CAST(( + JULIANDAY(DATE(user_sessions.session_end_ts, 'start of day')) - JULIANDAY(DATE(user_sessions.session_start_ts, 'start of day')) + ) AS INTEGER) * 24 + CAST(STRFTIME('%H', user_sessions.session_end_ts) AS INTEGER) - CAST(STRFTIME('%H', user_sessions.session_start_ts) AS INTEGER) + ) * 60 + CAST(STRFTIME('%M', user_sessions.session_end_ts) AS INTEGER) - CAST(STRFTIME('%M', user_sessions.session_start_ts) AS INTEGER) + ) * 60 + CAST(STRFTIME('%S', user_sessions.session_end_ts) AS INTEGER) - CAST(STRFTIME('%S', user_sessions.session_start_ts) AS INTEGER) + ) AS total_duration FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.user_sessions AS user_sessions + ON user_sessions.session_end_ts < '2023-06-08' + AND user_sessions.session_start_ts >= '2023-06-01' + AND user_sessions.user_id = users.uid +GROUP BY + user_sessions.user_id ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql index 66a103b7d..6ec5b8cb9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql @@ -1,18 +1,12 @@ -WITH _s1 AS ( - SELECT - user_id, - COUNT(*) AS n_rows - FROM main.notifications - WHERE - status = 'unread' AND type = 'promotion' - GROUP BY - 1 -) SELECT - users.username, - _s1.n_rows AS total_unread_notifs + ANY_VALUE(users.username) AS username, + COUNT(*) AS total_unread_notifs FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.notifications AS notifications + ON notifications.status = 'unread' + AND notifications.type = 'promotion' + AND notifications.user_id = users.uid WHERE LOWER(users.country) = 'us' +GROUP BY + notifications.user_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv16_mysql.sql index 66a103b7d..6ec5b8cb9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_mysql.sql @@ -1,18 +1,12 @@ -WITH _s1 AS ( - SELECT - user_id, - COUNT(*) AS n_rows - FROM main.notifications - WHERE - status = 'unread' AND type = 'promotion' - GROUP BY - 1 -) SELECT - users.username, - _s1.n_rows AS total_unread_notifs + ANY_VALUE(users.username) AS username, + COUNT(*) AS total_unread_notifs FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.notifications AS notifications + ON notifications.status = 'unread' + AND notifications.type = 'promotion' + AND notifications.user_id = users.uid WHERE LOWER(users.country) = 'us' +GROUP BY + notifications.user_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv16_postgres.sql index 66a103b7d..8ea047239 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_postgres.sql @@ -1,18 +1,12 @@ -WITH _s1 AS ( - SELECT - user_id, - COUNT(*) AS n_rows - FROM main.notifications - WHERE - status = 'unread' AND type = 'promotion' - GROUP BY - 1 -) SELECT - users.username, - _s1.n_rows AS total_unread_notifs + MAX(users.username) AS username, + COUNT(*) AS total_unread_notifs FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.notifications AS notifications + ON notifications.status = 'unread' + AND notifications.type = 'promotion' + AND notifications.user_id = users.uid WHERE LOWER(users.country) = 'us' +GROUP BY + notifications.user_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv16_snowflake.sql index 66a103b7d..6ec5b8cb9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_snowflake.sql @@ -1,18 +1,12 @@ -WITH _s1 AS ( - SELECT - user_id, - COUNT(*) AS n_rows - FROM main.notifications - WHERE - status = 'unread' AND type = 'promotion' - GROUP BY - 1 -) SELECT - users.username, - _s1.n_rows AS total_unread_notifs + ANY_VALUE(users.username) AS username, + COUNT(*) AS total_unread_notifs FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.notifications AS notifications + ON notifications.status = 'unread' + AND notifications.type = 'promotion' + AND notifications.user_id = users.uid WHERE LOWER(users.country) = 'us' +GROUP BY + notifications.user_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql index 66a103b7d..8ea047239 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql @@ -1,18 +1,12 @@ -WITH _s1 AS ( - SELECT - user_id, - COUNT(*) AS n_rows - FROM main.notifications - WHERE - status = 'unread' AND type = 'promotion' - GROUP BY - 1 -) SELECT - users.username, - _s1.n_rows AS total_unread_notifs + MAX(users.username) AS username, + COUNT(*) AS total_unread_notifs FROM main.users AS users -JOIN _s1 AS _s1 - ON _s1.user_id = users.uid +JOIN main.notifications AS notifications + ON notifications.status = 'unread' + AND notifications.type = 'promotion' + AND notifications.user_id = users.uid WHERE LOWER(users.country) = 'us' +GROUP BY + notifications.user_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql index 903ab4f68..23e0a0267 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql @@ -1,17 +1,9 @@ -WITH _s1 AS ( - SELECT - receiver_id, - COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, - COUNT(DISTINCT txid) AS ndistinct_txid - FROM main.wallet_transactions_daily - WHERE - status = 'success' - GROUP BY - 1 -) SELECT - merchants.name, - _s1.ndistinct_coupon_id / _s1.ndistinct_txid AS CPUR + ANY_VALUE(merchants.name) AS name, + COUNT(DISTINCT wallet_transactions_daily.coupon_id) / COUNT(DISTINCT wallet_transactions_daily.txid) AS CPUR FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv1_mysql.sql index 903ab4f68..23e0a0267 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_mysql.sql @@ -1,17 +1,9 @@ -WITH _s1 AS ( - SELECT - receiver_id, - COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, - COUNT(DISTINCT txid) AS ndistinct_txid - FROM main.wallet_transactions_daily - WHERE - status = 'success' - GROUP BY - 1 -) SELECT - merchants.name, - _s1.ndistinct_coupon_id / _s1.ndistinct_txid AS CPUR + ANY_VALUE(merchants.name) AS name, + COUNT(DISTINCT wallet_transactions_daily.coupon_id) / COUNT(DISTINCT wallet_transactions_daily.txid) AS CPUR FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv1_postgres.sql index a6d74f708..7cd6edb41 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_postgres.sql @@ -1,17 +1,9 @@ -WITH _s1 AS ( - SELECT - receiver_id, - COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, - COUNT(DISTINCT txid) AS ndistinct_txid - FROM main.wallet_transactions_daily - WHERE - status = 'success' - GROUP BY - 1 -) SELECT - merchants.name, - CAST(_s1.ndistinct_coupon_id AS DOUBLE PRECISION) / _s1.ndistinct_txid AS CPUR + MAX(merchants.name) AS name, + CAST(COUNT(DISTINCT wallet_transactions_daily.coupon_id) AS DOUBLE PRECISION) / COUNT(DISTINCT wallet_transactions_daily.txid) AS CPUR FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv1_snowflake.sql index 903ab4f68..23e0a0267 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_snowflake.sql @@ -1,17 +1,9 @@ -WITH _s1 AS ( - SELECT - receiver_id, - COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, - COUNT(DISTINCT txid) AS ndistinct_txid - FROM main.wallet_transactions_daily - WHERE - status = 'success' - GROUP BY - 1 -) SELECT - merchants.name, - _s1.ndistinct_coupon_id / _s1.ndistinct_txid AS CPUR + ANY_VALUE(merchants.name) AS name, + COUNT(DISTINCT wallet_transactions_daily.coupon_id) / COUNT(DISTINCT wallet_transactions_daily.txid) AS CPUR FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql index 765240a19..8c171e4f1 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql @@ -1,17 +1,9 @@ -WITH _s1 AS ( - SELECT - receiver_id, - COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, - COUNT(DISTINCT txid) AS ndistinct_txid - FROM main.wallet_transactions_daily - WHERE - status = 'success' - GROUP BY - 1 -) SELECT - merchants.name, - CAST(_s1.ndistinct_coupon_id AS REAL) / _s1.ndistinct_txid AS CPUR + MAX(merchants.name) AS name, + CAST(COUNT(DISTINCT wallet_transactions_daily.coupon_id) AS REAL) / COUNT(DISTINCT wallet_transactions_daily.txid) AS CPUR FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql index 9f6e854c1..e7cd13330 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - merchant_id, - COUNT(*) AS n_rows - FROM main.coupons - GROUP BY - 1 -) SELECT - merchants.name AS merchant_name, - _s1.n_rows AS total_coupons + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(*) AS total_coupons FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid +JOIN main.coupons AS coupons + ON coupons.merchant_id = merchants.mid WHERE LOWER(merchants.category) LIKE '%retail%' AND merchants.status = 'active' +GROUP BY + coupons.merchant_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv3_mysql.sql index 9f6e854c1..e7cd13330 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_mysql.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - merchant_id, - COUNT(*) AS n_rows - FROM main.coupons - GROUP BY - 1 -) SELECT - merchants.name AS merchant_name, - _s1.n_rows AS total_coupons + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(*) AS total_coupons FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid +JOIN main.coupons AS coupons + ON coupons.merchant_id = merchants.mid WHERE LOWER(merchants.category) LIKE '%retail%' AND merchants.status = 'active' +GROUP BY + coupons.merchant_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv3_postgres.sql index 9f6e854c1..ed5902569 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_postgres.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - merchant_id, - COUNT(*) AS n_rows - FROM main.coupons - GROUP BY - 1 -) SELECT - merchants.name AS merchant_name, - _s1.n_rows AS total_coupons + MAX(merchants.name) AS merchant_name, + COUNT(*) AS total_coupons FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid +JOIN main.coupons AS coupons + ON coupons.merchant_id = merchants.mid WHERE LOWER(merchants.category) LIKE '%retail%' AND merchants.status = 'active' +GROUP BY + coupons.merchant_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv3_snowflake.sql index 859a0e293..c300a9bda 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_snowflake.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - merchant_id, - COUNT(*) AS n_rows - FROM main.coupons - GROUP BY - 1 -) SELECT - merchants.name AS merchant_name, - _s1.n_rows AS total_coupons + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(*) AS total_coupons FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid +JOIN main.coupons AS coupons + ON coupons.merchant_id = merchants.mid WHERE CONTAINS(LOWER(merchants.category), 'retail') AND merchants.status = 'active' +GROUP BY + coupons.merchant_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql index 9f6e854c1..ed5902569 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - merchant_id, - COUNT(*) AS n_rows - FROM main.coupons - GROUP BY - 1 -) SELECT - merchants.name AS merchant_name, - _s1.n_rows AS total_coupons + MAX(merchants.name) AS merchant_name, + COUNT(*) AS total_coupons FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid +JOIN main.coupons AS coupons + ON coupons.merchant_id = merchants.mid WHERE LOWER(merchants.category) LIKE '%retail%' AND merchants.status = 'active' +GROUP BY + coupons.merchant_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql index 5b5f3027b..de3b98e6b 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql @@ -1,19 +1,13 @@ -WITH _s1 AS ( - SELECT - receiver_id, - SUM(amount) AS sum_amount - FROM main.wallet_transactions_daily - WHERE - receiver_type = 1 AND status = 'success' - GROUP BY - 1 -) SELECT - merchants.mid AS merchants_id, - merchants.name AS merchants_name, - merchants.category, - COALESCE(_s1.sum_amount, 0) AS total_revenue, - ROW_NUMBER() OVER (ORDER BY COALESCE(_s1.sum_amount, 0) DESC NULLS FIRST) AS mrr + ANY_VALUE(merchants.mid) AS merchants_id, + ANY_VALUE(merchants.name) AS merchants_name, + ANY_VALUE(merchants.category) AS category, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, + ROW_NUMBER() OVER (ORDER BY COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC NULLS FIRST) AS mrr FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.receiver_type = 1 + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql index 41f5dab89..b341dbba8 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql @@ -1,19 +1,13 @@ -WITH _s1 AS ( - SELECT - receiver_id, - SUM(amount) AS sum_amount - FROM main.wallet_transactions_daily - WHERE - receiver_type = 1 AND status = 'success' - GROUP BY - 1 -) SELECT - merchants.mid AS merchants_id, - merchants.name AS merchants_name, - merchants.category, - COALESCE(_s1.sum_amount, 0) AS total_revenue, - ROW_NUMBER() OVER (ORDER BY COALESCE(_s1.sum_amount, 0) DESC) AS mrr + ANY_VALUE(merchants.mid) AS merchants_id, + ANY_VALUE(merchants.name) AS merchants_name, + ANY_VALUE(merchants.category) AS category, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, + ROW_NUMBER() OVER (ORDER BY COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC) AS mrr FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.receiver_type = 1 + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql index 41f5dab89..4b1d90d67 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql @@ -1,19 +1,13 @@ -WITH _s1 AS ( - SELECT - receiver_id, - SUM(amount) AS sum_amount - FROM main.wallet_transactions_daily - WHERE - receiver_type = 1 AND status = 'success' - GROUP BY - 1 -) SELECT - merchants.mid AS merchants_id, - merchants.name AS merchants_name, - merchants.category, - COALESCE(_s1.sum_amount, 0) AS total_revenue, - ROW_NUMBER() OVER (ORDER BY COALESCE(_s1.sum_amount, 0) DESC) AS mrr + MAX(merchants.mid) AS merchants_id, + MAX(merchants.name) AS merchants_name, + MAX(merchants.category) AS category, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, + ROW_NUMBER() OVER (ORDER BY COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC) AS mrr FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.receiver_type = 1 + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql index 41f5dab89..b341dbba8 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql @@ -1,19 +1,13 @@ -WITH _s1 AS ( - SELECT - receiver_id, - SUM(amount) AS sum_amount - FROM main.wallet_transactions_daily - WHERE - receiver_type = 1 AND status = 'success' - GROUP BY - 1 -) SELECT - merchants.mid AS merchants_id, - merchants.name AS merchants_name, - merchants.category, - COALESCE(_s1.sum_amount, 0) AS total_revenue, - ROW_NUMBER() OVER (ORDER BY COALESCE(_s1.sum_amount, 0) DESC) AS mrr + ANY_VALUE(merchants.mid) AS merchants_id, + ANY_VALUE(merchants.name) AS merchants_name, + ANY_VALUE(merchants.category) AS category, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, + ROW_NUMBER() OVER (ORDER BY COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC) AS mrr FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.receiver_type = 1 + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql index 41f5dab89..4b1d90d67 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql @@ -1,19 +1,13 @@ -WITH _s1 AS ( - SELECT - receiver_id, - SUM(amount) AS sum_amount - FROM main.wallet_transactions_daily - WHERE - receiver_type = 1 AND status = 'success' - GROUP BY - 1 -) SELECT - merchants.mid AS merchants_id, - merchants.name AS merchants_name, - merchants.category, - COALESCE(_s1.sum_amount, 0) AS total_revenue, - ROW_NUMBER() OVER (ORDER BY COALESCE(_s1.sum_amount, 0) DESC) AS mrr + MAX(merchants.mid) AS merchants_id, + MAX(merchants.name) AS merchants_name, + MAX(merchants.category) AS category, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, + ROW_NUMBER() OVER (ORDER BY COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC) AS mrr FROM main.merchants AS merchants -JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.receiver_type = 1 + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_ansi.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_ansi.sql index 61fe15fea..939a6e155 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_ansi.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_ansi.sql @@ -1,26 +1,16 @@ -WITH _s3 AS ( - SELECT - c_nationkey, - COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS count_negative_acctbal, - COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS count_non_negative_acctbal, - MEDIAN(c_acctbal) AS median_c_acctbal, - MEDIAN(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS median_negative_acctbal, - MEDIAN(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS median_non_negative_acctbal - FROM tpch.customer - GROUP BY - 1 -) SELECT - nation.n_name AS nation_name, - _s3.count_negative_acctbal AS n_red_acctbal, - _s3.count_non_negative_acctbal AS n_black_acctbal, - _s3.median_negative_acctbal AS median_red_acctbal, - _s3.median_non_negative_acctbal AS median_black_acctbal, - _s3.median_c_acctbal AS median_overall_acctbal + ANY_VALUE(nation.n_name) AS nation_name, + COUNT(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) AS n_black_acctbal, + MEDIAN(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) AS median_red_acctbal, + MEDIAN(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) AS median_black_acctbal, + MEDIAN(customer.c_acctbal) AS median_overall_acctbal FROM tpch.nation AS nation JOIN tpch.region AS region ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' -JOIN _s3 AS _s3 - ON _s3.c_nationkey = nation.n_nationkey +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +GROUP BY + customer.c_nationkey ORDER BY 1 diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_mysql.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_mysql.sql index c4225e2bb..463f75371 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_mysql.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_mysql.sql @@ -1,4 +1,4 @@ -WITH _t2 AS ( +WITH _s3 AS ( SELECT c_acctbal, c_nationkey, @@ -42,29 +42,20 @@ WITH _t2 AS ( ELSE NULL END AS expr_7 FROM tpch.CUSTOMER -), _s3 AS ( - SELECT - c_nationkey, - AVG(expr_5) AS avg_expr_5, - AVG(expr_6) AS avg_expr_6, - AVG(expr_7) AS avg_expr_7, - COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS count_negative_acctbal, - COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS count_non_negative_acctbal - FROM _t2 - GROUP BY - 1 ) SELECT - NATION.n_name COLLATE utf8mb4_bin AS nation_name, - _s3.count_negative_acctbal AS n_red_acctbal, - _s3.count_non_negative_acctbal AS n_black_acctbal, - _s3.avg_expr_7 AS median_red_acctbal, - _s3.avg_expr_5 AS median_black_acctbal, - _s3.avg_expr_6 AS median_overall_acctbal + ANY_VALUE(NATION.n_name) COLLATE utf8mb4_bin AS nation_name, + COUNT(CASE WHEN _s3.c_acctbal < 0 THEN _s3.c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN _s3.c_acctbal >= 0 THEN _s3.c_acctbal ELSE NULL END) AS n_black_acctbal, + AVG(_s3.expr_7) AS median_red_acctbal, + AVG(_s3.expr_5) AS median_black_acctbal, + AVG(_s3.expr_6) AS median_overall_acctbal FROM tpch.NATION AS NATION JOIN tpch.REGION AS REGION ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'AMERICA' JOIN _s3 AS _s3 ON NATION.n_nationkey = _s3.c_nationkey +GROUP BY + _s3.c_nationkey ORDER BY 1 diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_postgres.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_postgres.sql index 813210501..6b44688e2 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_postgres.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_postgres.sql @@ -1,29 +1,19 @@ -WITH _s3 AS ( - SELECT - c_nationkey, - COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS count_negative_acctbal, - COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS count_non_negative_acctbal, - PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY - c_acctbal) AS median_c_acctbal, - PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY - CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS median_negative_acctbal, - PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY - CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS median_non_negative_acctbal - FROM tpch.customer - GROUP BY - 1 -) SELECT - nation.n_name AS nation_name, - _s3.count_negative_acctbal AS n_red_acctbal, - _s3.count_non_negative_acctbal AS n_black_acctbal, - _s3.median_negative_acctbal AS median_red_acctbal, - _s3.median_non_negative_acctbal AS median_black_acctbal, - _s3.median_c_acctbal AS median_overall_acctbal + MAX(nation.n_name) AS nation_name, + COUNT(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) AS n_black_acctbal, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY + CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) AS median_red_acctbal, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY + CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) AS median_black_acctbal, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY + customer.c_acctbal) AS median_overall_acctbal FROM tpch.nation AS nation JOIN tpch.region AS region ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' -JOIN _s3 AS _s3 - ON _s3.c_nationkey = nation.n_nationkey +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +GROUP BY + customer.c_nationkey ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_snowflake.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_snowflake.sql index 6704d705e..8096e2aa4 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_snowflake.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_snowflake.sql @@ -1,26 +1,16 @@ -WITH _s3 AS ( - SELECT - c_nationkey, - COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS count_negative_acctbal, - COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS count_non_negative_acctbal, - MEDIAN(c_acctbal) AS median_c_acctbal, - MEDIAN(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS median_negative_acctbal, - MEDIAN(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS median_non_negative_acctbal - FROM tpch.customer - GROUP BY - 1 -) SELECT - nation.n_name AS nation_name, - _s3.count_negative_acctbal AS n_red_acctbal, - _s3.count_non_negative_acctbal AS n_black_acctbal, - _s3.median_negative_acctbal AS median_red_acctbal, - _s3.median_non_negative_acctbal AS median_black_acctbal, - _s3.median_c_acctbal AS median_overall_acctbal + ANY_VALUE(nation.n_name) AS nation_name, + COUNT(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) AS n_black_acctbal, + MEDIAN(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) AS median_red_acctbal, + MEDIAN(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) AS median_black_acctbal, + MEDIAN(customer.c_acctbal) AS median_overall_acctbal FROM tpch.nation AS nation JOIN tpch.region AS region ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' -JOIN _s3 AS _s3 - ON _s3.c_nationkey = nation.n_nationkey +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +GROUP BY + customer.c_nationkey ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql index 46df14df7..822eced2d 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t2 AS ( +WITH _s3 AS ( SELECT c_acctbal, c_nationkey, @@ -42,29 +42,20 @@ WITH _t2 AS ( ELSE NULL END AS expr_7 FROM tpch.customer -), _s3 AS ( - SELECT - c_nationkey, - AVG(expr_5) AS avg_expr_5, - AVG(expr_6) AS avg_expr_6, - AVG(expr_7) AS avg_expr_7, - COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS count_negative_acctbal, - COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS count_non_negative_acctbal - FROM _t2 - GROUP BY - 1 ) SELECT - nation.n_name AS nation_name, - _s3.count_negative_acctbal AS n_red_acctbal, - _s3.count_non_negative_acctbal AS n_black_acctbal, - _s3.avg_expr_7 AS median_red_acctbal, - _s3.avg_expr_5 AS median_black_acctbal, - _s3.avg_expr_6 AS median_overall_acctbal + MAX(nation.n_name) AS nation_name, + COUNT(CASE WHEN _s3.c_acctbal < 0 THEN _s3.c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN _s3.c_acctbal >= 0 THEN _s3.c_acctbal ELSE NULL END) AS n_black_acctbal, + AVG(_s3.expr_7) AS median_red_acctbal, + AVG(_s3.expr_5) AS median_black_acctbal, + AVG(_s3.expr_6) AS median_overall_acctbal FROM tpch.nation AS nation JOIN tpch.region AS region ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' JOIN _s3 AS _s3 ON _s3.c_nationkey = nation.n_nationkey +GROUP BY + _s3.c_nationkey ORDER BY 1 diff --git a/tests/test_sql_refsols/simple_var_std_ansi.sql b/tests/test_sql_refsols/simple_var_std_ansi.sql index b477025a8..3db44a93e 100644 --- a/tests/test_sql_refsols/simple_var_std_ansi.sql +++ b/tests/test_sql_refsols/simple_var_std_ansi.sql @@ -1,24 +1,15 @@ -WITH _s1 AS ( - SELECT - s_nationkey, - STDDEV_POP(s_acctbal) AS population_std_s_acctbal, - VARIANCE_POP(s_acctbal) AS population_var_s_acctbal, - STDDEV(s_acctbal) AS sample_std_s_acctbal, - VARIANCE(s_acctbal) AS sample_var_s_acctbal - FROM tpch.supplier - GROUP BY - 1 -) SELECT - nation.n_name AS name, - _s1.population_var_s_acctbal AS var, - _s1.population_std_s_acctbal AS std, - _s1.sample_var_s_acctbal AS sample_var, - _s1.sample_std_s_acctbal AS sample_std, - _s1.population_var_s_acctbal AS pop_var, - _s1.population_std_s_acctbal AS pop_std + ANY_VALUE(nation.n_name) AS name, + VARIANCE_POP(supplier.s_acctbal) AS var, + STDDEV_POP(supplier.s_acctbal) AS std, + VARIANCE(supplier.s_acctbal) AS sample_var, + STDDEV(supplier.s_acctbal) AS sample_std, + VARIANCE_POP(supplier.s_acctbal) AS pop_var, + STDDEV_POP(supplier.s_acctbal) AS pop_std FROM tpch.nation AS nation -JOIN _s1 AS _s1 - ON _s1.s_nationkey = nation.n_nationkey +JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey WHERE nation.n_name IN ('ALGERIA', 'ARGENTINA') +GROUP BY + supplier.s_nationkey diff --git a/tests/test_sql_refsols/simple_var_std_mysql.sql b/tests/test_sql_refsols/simple_var_std_mysql.sql index 0f6e38717..197dcc144 100644 --- a/tests/test_sql_refsols/simple_var_std_mysql.sql +++ b/tests/test_sql_refsols/simple_var_std_mysql.sql @@ -1,70 +1,82 @@ -WITH _s1 AS ( - SELECT - s_nationkey, - POWER( +SELECT + ANY_VALUE(NATION.n_name) AS name, + ( + SUM(( + POWER(SUPPLIER.s_acctbal, 2) + )) - ( ( - ( - SUM(( - POWER(s_acctbal, 2) - )) - ( - ( - POWER(SUM(s_acctbal), 2) - ) / COUNT(s_acctbal) - ) - ) / COUNT(s_acctbal) - ), - 0.5 - ) AS population_std_s_acctbal, + POWER(SUM(SUPPLIER.s_acctbal), 2) + ) / COUNT(SUPPLIER.s_acctbal) + ) + ) / COUNT(SUPPLIER.s_acctbal) AS var, + POWER( ( - SUM(( - POWER(s_acctbal, 2) - )) - ( - ( - POWER(SUM(s_acctbal), 2) - ) / COUNT(s_acctbal) - ) - ) / COUNT(s_acctbal) AS population_var_s_acctbal, - POWER( ( - ( - SUM(( - POWER(s_acctbal, 2) - )) - ( - ( - POWER(SUM(s_acctbal), 2) - ) / COUNT(s_acctbal) - ) - ) / ( - COUNT(s_acctbal) - 1 + SUM(( + POWER(SUPPLIER.s_acctbal, 2) + )) - ( + ( + POWER(SUM(SUPPLIER.s_acctbal), 2) + ) / COUNT(SUPPLIER.s_acctbal) ) - ), - 0.5 - ) AS sample_std_s_acctbal, + ) / COUNT(SUPPLIER.s_acctbal) + ), + 0.5 + ) AS std, + ( + SUM(( + POWER(SUPPLIER.s_acctbal, 2) + )) - ( + ( + POWER(SUM(SUPPLIER.s_acctbal), 2) + ) / COUNT(SUPPLIER.s_acctbal) + ) + ) / ( + COUNT(SUPPLIER.s_acctbal) - 1 + ) AS sample_var, + POWER( ( - SUM(( - POWER(s_acctbal, 2) - )) - ( - ( - POWER(SUM(s_acctbal), 2) - ) / COUNT(s_acctbal) + ( + SUM(( + POWER(SUPPLIER.s_acctbal, 2) + )) - ( + ( + POWER(SUM(SUPPLIER.s_acctbal), 2) + ) / COUNT(SUPPLIER.s_acctbal) + ) + ) / ( + COUNT(SUPPLIER.s_acctbal) - 1 ) - ) / ( - COUNT(s_acctbal) - 1 - ) AS sample_var_s_acctbal - FROM tpch.SUPPLIER - GROUP BY - 1 -) -SELECT - NATION.n_name AS name, - _s1.population_var_s_acctbal AS var, - _s1.population_std_s_acctbal AS std, - _s1.sample_var_s_acctbal AS sample_var, - _s1.sample_std_s_acctbal AS sample_std, - _s1.population_var_s_acctbal AS pop_var, - _s1.population_std_s_acctbal AS pop_std + ), + 0.5 + ) AS sample_std, + ( + SUM(( + POWER(SUPPLIER.s_acctbal, 2) + )) - ( + ( + POWER(SUM(SUPPLIER.s_acctbal), 2) + ) / COUNT(SUPPLIER.s_acctbal) + ) + ) / COUNT(SUPPLIER.s_acctbal) AS pop_var, + POWER( + ( + ( + SUM(( + POWER(SUPPLIER.s_acctbal, 2) + )) - ( + ( + POWER(SUM(SUPPLIER.s_acctbal), 2) + ) / COUNT(SUPPLIER.s_acctbal) + ) + ) / COUNT(SUPPLIER.s_acctbal) + ), + 0.5 + ) AS pop_std FROM tpch.NATION AS NATION -JOIN _s1 AS _s1 - ON NATION.n_nationkey = _s1.s_nationkey +JOIN tpch.SUPPLIER AS SUPPLIER + ON NATION.n_nationkey = SUPPLIER.s_nationkey WHERE NATION.n_name IN ('ALGERIA', 'ARGENTINA') +GROUP BY + SUPPLIER.s_nationkey diff --git a/tests/test_sql_refsols/simple_var_std_postgres.sql b/tests/test_sql_refsols/simple_var_std_postgres.sql index 47cf6a80a..94419638f 100644 --- a/tests/test_sql_refsols/simple_var_std_postgres.sql +++ b/tests/test_sql_refsols/simple_var_std_postgres.sql @@ -1,24 +1,15 @@ -WITH _s1 AS ( - SELECT - s_nationkey, - STDDEV_POP(s_acctbal) AS population_std_s_acctbal, - VAR_POP(s_acctbal) AS population_var_s_acctbal, - STDDEV(s_acctbal) AS sample_std_s_acctbal, - VAR_SAMP(s_acctbal) AS sample_var_s_acctbal - FROM tpch.supplier - GROUP BY - 1 -) SELECT - nation.n_name AS name, - _s1.population_var_s_acctbal AS var, - _s1.population_std_s_acctbal AS std, - _s1.sample_var_s_acctbal AS sample_var, - _s1.sample_std_s_acctbal AS sample_std, - _s1.population_var_s_acctbal AS pop_var, - _s1.population_std_s_acctbal AS pop_std + MAX(nation.n_name) AS name, + VAR_POP(supplier.s_acctbal) AS var, + STDDEV_POP(supplier.s_acctbal) AS std, + VAR_SAMP(supplier.s_acctbal) AS sample_var, + STDDEV(supplier.s_acctbal) AS sample_std, + VAR_POP(supplier.s_acctbal) AS pop_var, + STDDEV_POP(supplier.s_acctbal) AS pop_std FROM tpch.nation AS nation -JOIN _s1 AS _s1 - ON _s1.s_nationkey = nation.n_nationkey +JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey WHERE nation.n_name IN ('ALGERIA', 'ARGENTINA') +GROUP BY + supplier.s_nationkey diff --git a/tests/test_sql_refsols/simple_var_std_snowflake.sql b/tests/test_sql_refsols/simple_var_std_snowflake.sql index b477025a8..3db44a93e 100644 --- a/tests/test_sql_refsols/simple_var_std_snowflake.sql +++ b/tests/test_sql_refsols/simple_var_std_snowflake.sql @@ -1,24 +1,15 @@ -WITH _s1 AS ( - SELECT - s_nationkey, - STDDEV_POP(s_acctbal) AS population_std_s_acctbal, - VARIANCE_POP(s_acctbal) AS population_var_s_acctbal, - STDDEV(s_acctbal) AS sample_std_s_acctbal, - VARIANCE(s_acctbal) AS sample_var_s_acctbal - FROM tpch.supplier - GROUP BY - 1 -) SELECT - nation.n_name AS name, - _s1.population_var_s_acctbal AS var, - _s1.population_std_s_acctbal AS std, - _s1.sample_var_s_acctbal AS sample_var, - _s1.sample_std_s_acctbal AS sample_std, - _s1.population_var_s_acctbal AS pop_var, - _s1.population_std_s_acctbal AS pop_std + ANY_VALUE(nation.n_name) AS name, + VARIANCE_POP(supplier.s_acctbal) AS var, + STDDEV_POP(supplier.s_acctbal) AS std, + VARIANCE(supplier.s_acctbal) AS sample_var, + STDDEV(supplier.s_acctbal) AS sample_std, + VARIANCE_POP(supplier.s_acctbal) AS pop_var, + STDDEV_POP(supplier.s_acctbal) AS pop_std FROM tpch.nation AS nation -JOIN _s1 AS _s1 - ON _s1.s_nationkey = nation.n_nationkey +JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey WHERE nation.n_name IN ('ALGERIA', 'ARGENTINA') +GROUP BY + supplier.s_nationkey diff --git a/tests/test_sql_refsols/simple_var_std_sqlite.sql b/tests/test_sql_refsols/simple_var_std_sqlite.sql index 569265b93..c73ac7bd0 100644 --- a/tests/test_sql_refsols/simple_var_std_sqlite.sql +++ b/tests/test_sql_refsols/simple_var_std_sqlite.sql @@ -1,70 +1,82 @@ -WITH _s1 AS ( - SELECT - s_nationkey, - POWER( - ( - CAST(( - SUM(( - POWER(s_acctbal, 2) - )) - ( - CAST(( - POWER(SUM(s_acctbal), 2) - ) AS REAL) / COUNT(s_acctbal) - ) - ) AS REAL) / COUNT(s_acctbal) - ), - 0.5 - ) AS population_std_s_acctbal, - CAST(( - SUM(( - POWER(s_acctbal, 2) - )) - ( - CAST(( - POWER(SUM(s_acctbal), 2) - ) AS REAL) / COUNT(s_acctbal) - ) - ) AS REAL) / COUNT(s_acctbal) AS population_var_s_acctbal, - POWER( - ( - CAST(( - SUM(( - POWER(s_acctbal, 2) - )) - ( - CAST(( - POWER(SUM(s_acctbal), 2) - ) AS REAL) / COUNT(s_acctbal) - ) - ) AS REAL) / ( - COUNT(s_acctbal) - 1 +SELECT + MAX(nation.n_name) AS name, + CAST(( + SUM(( + POWER(supplier.s_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(supplier.s_acctbal), 2) + ) AS REAL) / COUNT(supplier.s_acctbal) + ) + ) AS REAL) / COUNT(supplier.s_acctbal) AS var, + POWER( + ( + CAST(( + SUM(( + POWER(supplier.s_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(supplier.s_acctbal), 2) + ) AS REAL) / COUNT(supplier.s_acctbal) + ) + ) AS REAL) / COUNT(supplier.s_acctbal) + ), + 0.5 + ) AS std, + CAST(( + SUM(( + POWER(supplier.s_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(supplier.s_acctbal), 2) + ) AS REAL) / COUNT(supplier.s_acctbal) + ) + ) AS REAL) / ( + COUNT(supplier.s_acctbal) - 1 + ) AS sample_var, + POWER( + ( + CAST(( + SUM(( + POWER(supplier.s_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(supplier.s_acctbal), 2) + ) AS REAL) / COUNT(supplier.s_acctbal) ) - ), - 0.5 - ) AS sample_std_s_acctbal, - CAST(( - SUM(( - POWER(s_acctbal, 2) - )) - ( - CAST(( - POWER(SUM(s_acctbal), 2) - ) AS REAL) / COUNT(s_acctbal) + ) AS REAL) / ( + COUNT(supplier.s_acctbal) - 1 ) - ) AS REAL) / ( - COUNT(s_acctbal) - 1 - ) AS sample_var_s_acctbal - FROM tpch.supplier - GROUP BY - 1 -) -SELECT - nation.n_name AS name, - _s1.population_var_s_acctbal AS var, - _s1.population_std_s_acctbal AS std, - _s1.sample_var_s_acctbal AS sample_var, - _s1.sample_std_s_acctbal AS sample_std, - _s1.population_var_s_acctbal AS pop_var, - _s1.population_std_s_acctbal AS pop_std + ), + 0.5 + ) AS sample_std, + CAST(( + SUM(( + POWER(supplier.s_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(supplier.s_acctbal), 2) + ) AS REAL) / COUNT(supplier.s_acctbal) + ) + ) AS REAL) / COUNT(supplier.s_acctbal) AS pop_var, + POWER( + ( + CAST(( + SUM(( + POWER(supplier.s_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(supplier.s_acctbal), 2) + ) AS REAL) / COUNT(supplier.s_acctbal) + ) + ) AS REAL) / COUNT(supplier.s_acctbal) + ), + 0.5 + ) AS pop_std FROM tpch.nation AS nation -JOIN _s1 AS _s1 - ON _s1.s_nationkey = nation.n_nationkey +JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey WHERE nation.n_name IN ('ALGERIA', 'ARGENTINA') +GROUP BY + supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 1cf44ebb8..1b5d6ec54 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -1,39 +1,31 @@ -WITH _s3 AS ( +WITH _s5 AS ( SELECT - l_partkey, - SUM(l_quantity) AS sum_l_quantity - FROM tpch.lineitem - WHERE - EXTRACT(YEAR FROM CAST(l_shipdate AS DATETIME)) = 1994 - GROUP BY - 1 -), _s5 AS ( - SELECT - part.p_partkey, - _s3.sum_l_quantity + ANY_VALUE(part.p_partkey) AS anything_p_partkey, + SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part - JOIN _s3 AS _s3 - ON _s3.l_partkey = part.p_partkey + JOIN tpch.lineitem AS lineitem + ON EXTRACT(YEAR FROM CAST(lineitem.l_shipdate AS DATETIME)) = 1994 + AND lineitem.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _s7 AS ( - SELECT DISTINCT - partsupp.ps_suppkey - FROM tpch.partsupp AS partsupp - JOIN _s5 AS _s5 - ON _s5.p_partkey = partsupp.ps_partkey - AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.sum_l_quantity, 0) - ) + GROUP BY + lineitem.l_partkey ) SELECT - supplier.s_name AS S_NAME, - supplier.s_address AS S_ADDRESS + ANY_VALUE(supplier.s_name) AS S_NAME, + ANY_VALUE(supplier.s_address) AS S_ADDRESS FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _s7 AS _s7 - ON _s7.ps_suppkey = supplier.s_suppkey +JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey +JOIN _s5 AS _s5 + ON _s5.anything_p_partkey = partsupp.ps_partkey + AND partsupp.ps_availqty > ( + 0.5 * COALESCE(_s5.sum_l_quantity, 0) + ) +GROUP BY + partsupp.ps_suppkey ORDER BY 1 LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q20_mysql.sql b/tests/test_sql_refsols/tpch_q20_mysql.sql index 7705ccd05..284ef586d 100644 --- a/tests/test_sql_refsols/tpch_q20_mysql.sql +++ b/tests/test_sql_refsols/tpch_q20_mysql.sql @@ -1,39 +1,31 @@ -WITH _s3 AS ( +WITH _s5 AS ( SELECT - l_partkey, - SUM(l_quantity) AS sum_l_quantity - FROM tpch.LINEITEM - WHERE - EXTRACT(YEAR FROM CAST(l_shipdate AS DATETIME)) = 1994 - GROUP BY - 1 -), _s5 AS ( - SELECT - PART.p_partkey, - _s3.sum_l_quantity + ANY_VALUE(PART.p_partkey) AS anything_p_partkey, + SUM(LINEITEM.l_quantity) AS sum_l_quantity FROM tpch.PART AS PART - JOIN _s3 AS _s3 - ON PART.p_partkey = _s3.l_partkey + JOIN tpch.LINEITEM AS LINEITEM + ON EXTRACT(YEAR FROM CAST(LINEITEM.l_shipdate AS DATETIME)) = 1994 + AND LINEITEM.l_partkey = PART.p_partkey WHERE PART.p_name LIKE 'forest%' -), _s7 AS ( - SELECT DISTINCT - PARTSUPP.ps_suppkey - FROM tpch.PARTSUPP AS PARTSUPP - JOIN _s5 AS _s5 - ON PARTSUPP.ps_availqty > ( - 0.5 * COALESCE(_s5.sum_l_quantity, 0) - ) - AND PARTSUPP.ps_partkey = _s5.p_partkey + GROUP BY + LINEITEM.l_partkey ) SELECT - SUPPLIER.s_name COLLATE utf8mb4_bin AS S_NAME, - SUPPLIER.s_address AS S_ADDRESS + ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, + ANY_VALUE(SUPPLIER.s_address) AS S_ADDRESS FROM tpch.SUPPLIER AS SUPPLIER JOIN tpch.NATION AS NATION ON NATION.n_name = 'CANADA' AND NATION.n_nationkey = SUPPLIER.s_nationkey -JOIN _s7 AS _s7 - ON SUPPLIER.s_suppkey = _s7.ps_suppkey +JOIN tpch.PARTSUPP AS PARTSUPP + ON PARTSUPP.ps_suppkey = SUPPLIER.s_suppkey +JOIN _s5 AS _s5 + ON PARTSUPP.ps_availqty > ( + 0.5 * COALESCE(_s5.sum_l_quantity, 0) + ) + AND PARTSUPP.ps_partkey = _s5.anything_p_partkey +GROUP BY + PARTSUPP.ps_suppkey ORDER BY 1 LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q20_postgres.sql b/tests/test_sql_refsols/tpch_q20_postgres.sql index bfd924a91..3f261a9bf 100644 --- a/tests/test_sql_refsols/tpch_q20_postgres.sql +++ b/tests/test_sql_refsols/tpch_q20_postgres.sql @@ -1,39 +1,31 @@ -WITH _s3 AS ( +WITH _s5 AS ( SELECT - l_partkey, - SUM(l_quantity) AS sum_l_quantity - FROM tpch.lineitem - WHERE - EXTRACT(YEAR FROM CAST(l_shipdate AS TIMESTAMP)) = 1994 - GROUP BY - 1 -), _s5 AS ( - SELECT - part.p_partkey, - _s3.sum_l_quantity + MAX(part.p_partkey) AS anything_p_partkey, + SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part - JOIN _s3 AS _s3 - ON _s3.l_partkey = part.p_partkey + JOIN tpch.lineitem AS lineitem + ON EXTRACT(YEAR FROM CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1994 + AND lineitem.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _s7 AS ( - SELECT DISTINCT - partsupp.ps_suppkey - FROM tpch.partsupp AS partsupp - JOIN _s5 AS _s5 - ON _s5.p_partkey = partsupp.ps_partkey - AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.sum_l_quantity, 0) - ) + GROUP BY + lineitem.l_partkey ) SELECT - supplier.s_name AS S_NAME, - supplier.s_address AS S_ADDRESS + MAX(supplier.s_name) AS S_NAME, + MAX(supplier.s_address) AS S_ADDRESS FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _s7 AS _s7 - ON _s7.ps_suppkey = supplier.s_suppkey +JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey +JOIN _s5 AS _s5 + ON _s5.anything_p_partkey = partsupp.ps_partkey + AND partsupp.ps_availqty > ( + 0.5 * COALESCE(_s5.sum_l_quantity, 0) + ) +GROUP BY + partsupp.ps_suppkey ORDER BY 1 NULLS FIRST LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q20_snowflake.sql b/tests/test_sql_refsols/tpch_q20_snowflake.sql index 22d85f1d2..cc2d0c469 100644 --- a/tests/test_sql_refsols/tpch_q20_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q20_snowflake.sql @@ -1,39 +1,31 @@ -WITH _s3 AS ( +WITH _s5 AS ( SELECT - l_partkey, - SUM(l_quantity) AS sum_l_quantity - FROM tpch.lineitem - WHERE - YEAR(CAST(l_shipdate AS TIMESTAMP)) = 1994 - GROUP BY - 1 -), _s5 AS ( - SELECT - part.p_partkey, - _s3.sum_l_quantity + ANY_VALUE(part.p_partkey) AS anything_p_partkey, + SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part - JOIN _s3 AS _s3 - ON _s3.l_partkey = part.p_partkey + JOIN tpch.lineitem AS lineitem + ON YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1994 + AND lineitem.l_partkey = part.p_partkey WHERE STARTSWITH(part.p_name, 'forest') -), _s7 AS ( - SELECT DISTINCT - partsupp.ps_suppkey - FROM tpch.partsupp AS partsupp - JOIN _s5 AS _s5 - ON _s5.p_partkey = partsupp.ps_partkey - AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.sum_l_quantity, 0) - ) + GROUP BY + lineitem.l_partkey ) SELECT - supplier.s_name AS S_NAME, - supplier.s_address AS S_ADDRESS + ANY_VALUE(supplier.s_name) AS S_NAME, + ANY_VALUE(supplier.s_address) AS S_ADDRESS FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _s7 AS _s7 - ON _s7.ps_suppkey = supplier.s_suppkey +JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey +JOIN _s5 AS _s5 + ON _s5.anything_p_partkey = partsupp.ps_partkey + AND partsupp.ps_availqty > ( + 0.5 * COALESCE(_s5.sum_l_quantity, 0) + ) +GROUP BY + partsupp.ps_suppkey ORDER BY 1 NULLS FIRST LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index c553dc004..46f3cd2d7 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -1,39 +1,31 @@ -WITH _s3 AS ( +WITH _s5 AS ( SELECT - l_partkey, - SUM(l_quantity) AS sum_l_quantity - FROM tpch.lineitem - WHERE - CAST(STRFTIME('%Y', l_shipdate) AS INTEGER) = 1994 - GROUP BY - 1 -), _s5 AS ( - SELECT - part.p_partkey, - _s3.sum_l_quantity + MAX(part.p_partkey) AS anything_p_partkey, + SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part - JOIN _s3 AS _s3 - ON _s3.l_partkey = part.p_partkey + JOIN tpch.lineitem AS lineitem + ON CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1994 + AND lineitem.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _s7 AS ( - SELECT DISTINCT - partsupp.ps_suppkey - FROM tpch.partsupp AS partsupp - JOIN _s5 AS _s5 - ON _s5.p_partkey = partsupp.ps_partkey - AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.sum_l_quantity, 0) - ) + GROUP BY + lineitem.l_partkey ) SELECT - supplier.s_name AS S_NAME, - supplier.s_address AS S_ADDRESS + MAX(supplier.s_name) AS S_NAME, + MAX(supplier.s_address) AS S_ADDRESS FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _s7 AS _s7 - ON _s7.ps_suppkey = supplier.s_suppkey +JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey +JOIN _s5 AS _s5 + ON _s5.anything_p_partkey = partsupp.ps_partkey + AND partsupp.ps_availqty > ( + 0.5 * COALESCE(_s5.sum_l_quantity, 0) + ) +GROUP BY + partsupp.ps_suppkey ORDER BY 1 LIMIT 10 From 2d22b2a6b0f62350dadbdc14affcb59699f74a1f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 15:16:32 -0400 Subject: [PATCH 101/143] Fixing key rename bug --- .../conversion/join_aggregate_transpose.py | 186 +++++++----------- tests/test_plan_refsols/correl_29.txt | 26 +-- tests/test_plan_refsols/correl_30.txt | 26 +-- tests/test_plan_refsols/correl_6.txt | 2 +- .../multi_partition_access_2.txt | 34 ++-- .../multi_partition_access_3.txt | 14 +- .../multi_partition_access_4.txt | 10 +- .../multi_partition_access_6.txt | 28 +-- tests/test_sql_refsols/correl_29_sqlite.sql | 66 +++---- tests/test_sql_refsols/correl_30_sqlite.sql | 35 ++-- tests/test_sql_refsols/correl_6_sqlite.sql | 23 +-- .../defog_dermtreatment_basic4_ansi.sql | 27 +-- .../defog_dermtreatment_basic4_mysql.sql | 27 +-- .../defog_dermtreatment_basic4_postgres.sql | 27 +-- .../defog_dermtreatment_basic4_snowflake.sql | 27 +-- .../defog_dermtreatment_basic4_sqlite.sql | 27 +-- 16 files changed, 265 insertions(+), 320 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index c6a0504ef..07d0921fe 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -20,7 +20,7 @@ RelationalRoot, RelationalShuttle, ) -from pydough.relational.rel_util import apply_substitution +from pydough.relational.rel_util import add_input_name, apply_substitution class JoinAggregateTransposeShuttle(RelationalShuttle): @@ -153,9 +153,9 @@ def join_aggregate_transpose( # expression will be in the output columns of the new aggregate new_join_columns: dict[str, RelationalExpression] = {} - new_aggregate_keys: dict[str, RelationalExpression] = dict(aggregate.keys) - new_aggregate_aggs: dict[str, CallExpression] = dict(aggregate.aggregations) - new_agg_names: set[str] = set(aggregate.keys) | set(aggregate.aggregations) + new_aggregate_keys: dict[str, RelationalExpression] = {} + new_aggregate_aggs: dict[str, CallExpression] = {} + new_agg_names: set[str] = set() agg_input: RelationalNode = aggregate.inputs[0] non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] @@ -163,50 +163,10 @@ def join_aggregate_transpose( [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] ) - # Ensure all of the aggregate keys are column references - key_columns: dict[str, RelationalExpression] = {} - if any( - not isinstance(expr, ColumnReference) for expr in aggregate.keys.values() - ): - agg_input_project: dict[str, RelationalExpression] = {} - for col_name, col_expr in agg_input.columns.items(): - agg_input_project[col_name] = ColumnReference( - col_name, col_expr.data_type - ) - for key_name, key_expr in aggregate.keys.items(): - if not isinstance(key_expr, ColumnReference): - new_key_name: str = self.generate_name(key_name, agg_input_project) - agg_input_project[new_key_name] = key_expr - key_columns[key_name] = ColumnReference( - new_key_name, key_expr.data_type - ) - else: - key_columns[key_name] = key_expr - agg_input = Project(agg_input, agg_input_project) - else: - key_columns.update(aggregate.keys) - - join_reverse_map: dict[RelationalExpression, set[str]] = {} - for col_name, expr in join.columns.items(): - if expr not in join_reverse_map: - join_reverse_map[expr] = set() - join_reverse_map[expr].add(col_name) - - agg_reverse_map: dict[RelationalExpression, set[str]] = {} - for name, expr in aggregate.columns.items(): - ref_expr: ColumnReference = ColumnReference(name, expr.data_type, agg_alias) - if ref_expr in join_reverse_map: - if expr not in agg_reverse_map: - agg_reverse_map[expr] = set() - agg_reverse_map[expr].update(join_reverse_map[ref_expr]) - project_columns: dict[str, RelationalExpression] = {} - for col_name, col_expr in aggregate.columns.items(): - for proj_name in agg_reverse_map.get(col_expr, []): - project_columns[proj_name] = ColumnReference( - col_name, col_expr.data_type - ) + # Identify the new cardinality of the join if the aggregate is no longer + # happening before the join. new_cardinality: JoinCardinality = join.cardinality new_reverse_cardinality: JoinCardinality = join.reverse_cardinality if is_left: @@ -214,73 +174,70 @@ def join_aggregate_transpose( else: new_cardinality = new_cardinality.add_plural() - join_substitutions: dict[RelationalExpression, RelationalExpression] = {} + join_name: str + agg_name: str - agg_input_mapping: dict[str, str] = {} - for col_name, col_expr in agg_input.columns.items(): - new_join_columns[col_name] = ColumnReference( - col_name, col_expr.data_type, agg_alias - ) - agg_input_mapping[col_name] = col_name - - for col_name, col_expr in non_agg_input.columns.items(): - new_col_name: str = col_name - if new_col_name in new_join_columns: - new_col_name = self.generate_name(col_name, new_join_columns) - assert col_name not in new_join_columns - new_join_columns[new_col_name] = ColumnReference( - col_name, col_expr.data_type, non_agg_alias - ) - agg_input_mapping[col_name] = new_col_name - - agg_col_name: str = new_col_name - if agg_col_name in new_agg_names: - agg_col_name = self.generate_name(new_col_name, new_agg_names) - new_aggregate_aggs[agg_col_name] = CallExpression( - pydop.ANYTHING, - col_expr.data_type, - [ColumnReference(new_col_name, col_expr.data_type)], - ) - new_agg_names.add(agg_col_name) - non_ref: ColumnReference = ColumnReference( - col_name, col_expr.data_type, non_agg_alias - ) - for proj_name in join_reverse_map.get(non_ref, []): - project_columns[proj_name] = ColumnReference( - agg_col_name, col_expr.data_type - ) + agg_columns_remapped: dict[RelationalExpression, RelationalExpression] = {} + join_sub: dict[RelationalExpression, RelationalExpression] = {} - # TODO: POPULATE JOIN_SUBSTITUTIONS - - # TODO: - # Build join with every column from both inputs - # build mapping of each column in the two inputs to its new name - - # agg_key_substitution: dict[RelationalExpression, RelationalExpression] = {} - # for key_name, key_expr in aggregate.keys.items(): - # new_key_expr: RelationalExpression = add_input_name(key_expr, agg_alias) - # new_key_ref: ColumnReference = ColumnReference(key_name, key_expr.data_type) - # new_join_columns[key_name] = new_key_expr - # new_aggregate_keys[key_name] = new_key_ref - # if new_key_ref in agg_reverse_map: - # for col_name in agg_reverse_map[new_key_ref]: - # project_columns[col_name] = new_key_ref - # if isinstance(key_expr, ColumnReference) and key_expr.name == key_name: - # continue - # sided_key: RelationalExpression = ColumnReference( - # key_name, key_expr.data_type, agg_alias - # ) - # agg_key_substitution[sided_key] = new_key_expr - - # for agg_name, agg_call in aggregate.aggregations.items(): - - new_condition: RelationalExpression = apply_substitution( - join.condition, join_substitutions, {} - ) + for col_name, col_expr in join.columns.items(): + assert isinstance(col_expr, ColumnReference) + join_name = self.generate_name(col_name, new_join_columns) + agg_name = self.generate_name(col_name, new_agg_names) + if col_expr.input_name == agg_alias: + if col_expr.name in aggregate.keys: + new_join_columns[join_name] = add_input_name( + aggregate.keys[col_expr.name], agg_alias + ) + new_aggregate_keys[agg_name] = ColumnReference( + join_name, col_expr.data_type + ) + agg_columns_remapped[aggregate.keys[col_expr.name]] = ( + ColumnReference(join_name, col_expr.data_type) + ) + else: + sub_agg_name: str + current_agg: CallExpression = aggregate.aggregations[col_expr.name] + for arg in current_agg.inputs: + sub_agg_name = self.generate_name("expr", new_join_columns) + new_join_columns[sub_agg_name] = add_input_name(arg, agg_alias) + agg_columns_remapped[arg] = ColumnReference( + sub_agg_name, arg.data_type + ) + new_call = apply_substitution( + aggregate.aggregations[col_expr.name], agg_columns_remapped, {} + ) + assert isinstance(new_call, CallExpression) + new_aggregate_aggs[agg_name] = new_call + new_agg_names.add(agg_name) + else: + new_join_columns[join_name] = ColumnReference( + col_expr.name, col_expr.data_type, non_agg_alias + ) + new_aggregate_aggs[agg_name] = CallExpression( + pydop.ANYTHING, + col_expr.data_type, + [ColumnReference(join_name, col_expr.data_type)], + ) + new_agg_names.add(agg_name) + project_columns[col_name] = ColumnReference(agg_name, col_expr.data_type) + + for agg_key_name, agg_key_expr in aggregate.keys.items(): + if agg_key_name not in new_aggregate_keys: + join_name = self.generate_name(agg_key_name, new_join_columns) + agg_name = self.generate_name(agg_key_name, new_agg_names) + new_join_columns[join_name] = add_input_name(agg_key_expr, agg_alias) + new_aggregate_keys[agg_name] = ColumnReference( + join_name, agg_key_expr.data_type + ) + new_agg_names.add(agg_name) + join_sub[ + ColumnReference(agg_key_name, agg_key_expr.data_type, agg_alias) + ] = new_join_columns[join_name] new_join: Join = Join( new_join_inputs, - new_condition, + apply_substitution(join.condition, join_sub, {}), join.join_type, new_join_columns, new_cardinality, @@ -294,17 +251,16 @@ def join_aggregate_transpose( new_project: Project = Project(new_aggregate, project_columns) - print() - print(join.to_tree_string()) + # print() + # print(join.to_tree_string()) - print() - print(new_join.to_tree_string()) + # print() + # print(new_join.to_tree_string()) - print() - print(new_project.to_tree_string()) + # print() + # print(new_project.to_tree_string()) # breakpoint() - # assert False return new_project diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 84faa88d0..2cb18dfd3 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,17 +1,17 @@ -ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_14), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_3_14': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) - FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal)}) +ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_name', anything_anything_anything_n_name), ('n_above_avg_customers', anything_anything_n_rows), ('n_above_avg_suppliers', agg_3_14), ('min_cust_acctbal', anything_min_c_acctbal), ('max_cust_acctbal', anything_max_c_acctbal)], orderings=[(anything_anything_anything_n_regionkey):asc_first, (anything_anything_anything_n_name):asc_first]) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_nationkey': t1.n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 0912d4959..a9718cf64 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_above_avg_suppliers)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_lower_r_name': t0.anything_lower_r_name, 'anything_n_name': t0.anything_n_name, 'n_above_avg_suppliers': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) +ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', anything_n_rows)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -10,13 +10,13 @@ ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_6.txt b/tests/test_plan_refsols/correl_6.txt index f3f3f98dc..a1be82d8f 100644 --- a/tests/test_plan_refsols/correl_6.txt +++ b/tests/test_plan_refsols/correl_6.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', anything_r_name), ('n_prefix_nations', n_rows)], orderings=[]) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'anything_r_name': ANYTHING(r_name), 'n_rows': COUNT()}) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 132528e7a..adc747c7b 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,30 +1,30 @@ -ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_sum_sbTxShares_1 / sum_count_sbTxShares_1), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares_1 / t0.sum_count_sbTxShares_1 & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t1.sum_sum_sbTxShares}) +ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_anything_sum_sbTxShares / sum_anything_count_sbTxShares), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) + JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_anything_sum_sbTxShares / t0.sum_anything_count_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_anything_count_sbTxShares': t1.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t1.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) + JOIN(condition=t0.anything_customer_id_9 == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t0.anything_sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId_0': sbTxTickerId_0}, aggregations={'anything_customer_id_9': ANYTHING(customer_id_9), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'anything_sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxTickerId_0': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_anything_count_sbTxShares': t1.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t1.sum_anything_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_sbTxShares': t1.sum_sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_anything_count_sbTxShares': SUM(anything_count_sbTxShares), 'sum_anything_sum_sbTxShares': SUM(anything_sum_sbTxShares)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'anything_count_sbTxShares': ANYTHING(count_sbTxShares), 'anything_sum_sbTxShares': ANYTHING(sum_sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t1.sum_sbTxShares}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'avg_sbTxShares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index 3203eb669..bcd3fedcb 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,12 +1,12 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) - JOIN(condition=t1.sbDpClose < t0.max_sbDpClose & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) - AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) - AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) + JOIN(condition=t1.sbDpClose < t0.max_anything_sbDpClose & t0.anything_sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) + AGGREGATE(keys={'anything_sbTickerType': anything_sbTickerType}, aggregations={'max_anything_sbDpClose': MAX(anything_sbDpClose)}) + AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'anything_sbDpClose': ANYTHING(sbDpClose), 'anything_sbTickerType': ANYTHING(sbTickerType)}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t0.sbDpTickerId, 'sbTickerType': t1.sbTickerType}) SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.max_sbDpClose, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index f7be3c1ff..ac8d02fb6 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'max_max_sbTxShares': t0.max_sbTxShares, 'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) + JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.anything_sbTxCustId == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.anything_max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + AGGREGATE(keys={'sbTxCustId_0': sbTxCustId_0}, aggregations={'anything_max_sbTxShares': ANYTHING(max_sbTxShares), 'anything_sbTxCustId': ANYTHING(sbTxCustId), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'max_max_sbTxShares': MAX(sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxCustId_0': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 404f488d8..f7361f00d 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -1,20 +1,20 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_rows == 1:numeric | t0.n_rows == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_rows > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + FILTER(condition=n_rows > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t0.n_rows, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_rows > 1:numeric, columns={'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_rows > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + FILTER(condition=n_rows > 1:numeric, columns={'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + FILTER(condition=n_rows > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_sql_refsols/correl_29_sqlite.sql b/tests/test_sql_refsols/correl_29_sqlite.sql index 04510c07f..8e3599f4b 100644 --- a/tests/test_sql_refsols/correl_29_sqlite.sql +++ b/tests/test_sql_refsols/correl_29_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t5 AS ( SELECT c_acctbal, c_nationkey @@ -7,10 +7,10 @@ WITH _t3 AS ( SELECT c_nationkey, AVG(c_acctbal) AS avg_c_acctbal - FROM _t3 + FROM _t5 GROUP BY 1 -), _t1 AS ( +), _t3 AS ( SELECT nation.n_nationkey, MAX(nation.n_name) AS anything_n_name, @@ -19,18 +19,25 @@ WITH _t3 AS ( FROM tpch.nation AS nation JOIN _s1 AS _s1 ON _s1.c_nationkey = nation.n_nationkey - JOIN _t3 AS _s3 + JOIN _t5 AS _s3 ON _s1.avg_c_acctbal < _s3.c_acctbal AND _s3.c_nationkey = nation.n_nationkey GROUP BY 1 -), _s5 AS ( +), _s10 AS ( SELECT - c_nationkey, - MAX(c_acctbal) AS max_c_acctbal, - MIN(c_acctbal) AS min_c_acctbal - FROM _t3 + MAX(_t3.anything_n_name) AS anything_anything_n_name, + MAX(_t3.anything_n_regionkey) AS anything_anything_n_regionkey, + MAX(_t3.n_nationkey) AS anything_n_nationkey, + MAX(_t3.n_rows) AS anything_n_rows, + MAX(_s5.c_acctbal) AS max_c_acctbal, + MIN(_s5.c_acctbal) AS min_c_acctbal + FROM _t3 AS _t3 + JOIN _t5 AS _s5 + ON _s5.c_nationkey = _t3.n_nationkey + WHERE + _t3.anything_n_regionkey IN (1, 3) GROUP BY - 1 + _s5.c_nationkey ), _t6 AS ( SELECT s_acctbal, @@ -43,32 +50,23 @@ WITH _t3 AS ( FROM _t6 GROUP BY 1 -), _s11 AS ( - SELECT - nation.n_nationkey, - COUNT(*) AS n_rows - FROM tpch.nation AS nation - JOIN _s7 AS _s7 - ON _s7.s_nationkey = nation.n_nationkey - JOIN _t6 AS _s9 - ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey - GROUP BY - 1 ) SELECT - _t1.anything_n_regionkey AS region_key, - _t1.anything_n_name AS nation_name, - _t1.n_rows AS n_above_avg_customers, - _s11.n_rows AS n_above_avg_suppliers, - _s5.min_c_acctbal AS min_cust_acctbal, - _s5.max_c_acctbal AS max_cust_acctbal -FROM _t1 AS _t1 -JOIN _s5 AS _s5 - ON _s5.c_nationkey = _t1.n_nationkey -JOIN _s11 AS _s11 - ON _s11.n_nationkey = _t1.n_nationkey -WHERE - _t1.anything_n_regionkey IN (1, 3) + MAX(_s10.anything_anything_n_regionkey) AS region_key, + MAX(_s10.anything_anything_n_name) AS nation_name, + MAX(_s10.anything_n_rows) AS n_above_avg_customers, + COUNT(*) AS n_above_avg_suppliers, + MAX(_s10.min_c_acctbal) AS min_cust_acctbal, + MAX(_s10.max_c_acctbal) AS max_cust_acctbal +FROM _s10 AS _s10 +JOIN tpch.nation AS nation + ON _s10.anything_n_nationkey = nation.n_nationkey +JOIN _s7 AS _s7 + ON _s7.s_nationkey = nation.n_nationkey +JOIN _t6 AS _s9 + ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey +GROUP BY + nation.n_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/correl_30_sqlite.sql b/tests/test_sql_refsols/correl_30_sqlite.sql index e0e7d6846..e94f9a71c 100644 --- a/tests/test_sql_refsols/correl_30_sqlite.sql +++ b/tests/test_sql_refsols/correl_30_sqlite.sql @@ -17,21 +17,6 @@ WITH _t2 AS ( FROM tpch.region WHERE NOT r_name IN ('MIDDLE EAST', 'AFRICA', 'ASIA') -), _s12 AS ( - SELECT - nation.n_nationkey, - MAX(LOWER(_t3.r_name)) AS anything_lower_r_name, - MAX(nation.n_name) AS anything_n_name, - COUNT(*) AS n_rows - FROM tpch.nation AS nation - JOIN _s1 AS _s1 - ON _s1.c_nationkey = nation.n_nationkey - JOIN _t3 AS _t3 - ON _t3.r_regionkey = nation.n_regionkey - JOIN _t2 AS _s5 - ON _s1.avg_c_acctbal < _s5.c_acctbal AND _s5.c_nationkey = nation.n_nationkey - GROUP BY - 1 ), _t5 AS ( SELECT s_acctbal, @@ -59,13 +44,21 @@ WITH _t2 AS ( 1 ) SELECT - _s12.anything_lower_r_name AS region_name, - _s12.anything_n_name AS nation_name, - _s12.n_rows AS n_above_avg_customers, - _s13.n_rows AS n_above_avg_suppliers -FROM _s12 AS _s12 + MAX(LOWER(_t3.r_name)) AS region_name, + MAX(nation.n_name) AS nation_name, + COUNT(*) AS n_above_avg_customers, + MAX(_s13.n_rows) AS n_above_avg_suppliers +FROM tpch.nation AS nation +JOIN _s1 AS _s1 + ON _s1.c_nationkey = nation.n_nationkey +JOIN _t3 AS _t3 + ON _t3.r_regionkey = nation.n_regionkey +JOIN _t2 AS _s5 + ON _s1.avg_c_acctbal < _s5.c_acctbal AND _s5.c_nationkey = nation.n_nationkey JOIN _s13 AS _s13 - ON _s12.n_nationkey = _s13.n_nationkey + ON _s13.n_nationkey = nation.n_nationkey +GROUP BY + nation.n_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/correl_6_sqlite.sql b/tests/test_sql_refsols/correl_6_sqlite.sql index 9e8c22fc2..d6dd965e6 100644 --- a/tests/test_sql_refsols/correl_6_sqlite.sql +++ b/tests/test_sql_refsols/correl_6_sqlite.sql @@ -1,17 +1,10 @@ -WITH _s1 AS ( - SELECT - SUBSTRING(n_name, 1, 1) AS expr_1, - n_regionkey, - COUNT(*) AS n_rows - FROM tpch.nation - GROUP BY - 1, - 2 -) SELECT - region.r_name AS name, - _s1.n_rows AS n_prefix_nations + MAX(region.r_name) AS name, + COUNT(*) AS n_prefix_nations FROM tpch.region AS region -JOIN _s1 AS _s1 - ON _s1.expr_1 = SUBSTRING(region.r_name, 1, 1) - AND _s1.n_regionkey = region.r_regionkey +JOIN tpch.nation AS nation + ON SUBSTRING(nation.n_name, 1, 1) = SUBSTRING(region.r_name, 1, 1) + AND nation.n_regionkey = region.r_regionkey +GROUP BY + nation.n_regionkey, + SUBSTRING(nation.n_name, 1, 1) diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic4_ansi.sql b/tests/test_sql_refsols/defog_dermtreatment_basic4_ansi.sql index 109955774..028a298d0 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic4_ansi.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic4_ansi.sql @@ -1,28 +1,29 @@ -WITH _s1 AS ( +WITH _t1 AS ( SELECT - treatment_id, - MAX(day100_itch_vas) AS max_day100_itch_vas - FROM main.outcomes + ANY_VALUE(treatments.diag_id) AS anything_diag_id, + ANY_VALUE(treatments.patient_id) AS anything_patient_id, + MAX(outcomes.day100_itch_vas) AS max_day100_itch_vas + FROM main.treatments AS treatments + JOIN main.outcomes AS outcomes + ON outcomes.treatment_id = treatments.treatment_id GROUP BY - 1 + outcomes.treatment_id ), _s3 AS ( SELECT - treatments.diag_id, - MAX(_s1.max_day100_itch_vas) AS max_max_day100_itch_vas, - COUNT(DISTINCT treatments.patient_id) AS ndistinct_patient_id - FROM main.treatments AS treatments - JOIN _s1 AS _s1 - ON _s1.treatment_id = treatments.treatment_id + anything_diag_id, + MAX(max_day100_itch_vas) AS max_max_day100_itch_vas, + COUNT(DISTINCT anything_patient_id) AS ndistinct_anything_patient_id + FROM _t1 GROUP BY 1 ) SELECT diagnoses.diag_name AS diagnosis_name, - _s3.ndistinct_patient_id AS num_patients, + _s3.ndistinct_anything_patient_id AS num_patients, _s3.max_max_day100_itch_vas AS max_itch_score FROM main.diagnoses AS diagnoses JOIN _s3 AS _s3 - ON _s3.diag_id = diagnoses.diag_id + ON _s3.anything_diag_id = diagnoses.diag_id ORDER BY 3 DESC, 2 DESC diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic4_mysql.sql b/tests/test_sql_refsols/defog_dermtreatment_basic4_mysql.sql index 109955774..028a298d0 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic4_mysql.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic4_mysql.sql @@ -1,28 +1,29 @@ -WITH _s1 AS ( +WITH _t1 AS ( SELECT - treatment_id, - MAX(day100_itch_vas) AS max_day100_itch_vas - FROM main.outcomes + ANY_VALUE(treatments.diag_id) AS anything_diag_id, + ANY_VALUE(treatments.patient_id) AS anything_patient_id, + MAX(outcomes.day100_itch_vas) AS max_day100_itch_vas + FROM main.treatments AS treatments + JOIN main.outcomes AS outcomes + ON outcomes.treatment_id = treatments.treatment_id GROUP BY - 1 + outcomes.treatment_id ), _s3 AS ( SELECT - treatments.diag_id, - MAX(_s1.max_day100_itch_vas) AS max_max_day100_itch_vas, - COUNT(DISTINCT treatments.patient_id) AS ndistinct_patient_id - FROM main.treatments AS treatments - JOIN _s1 AS _s1 - ON _s1.treatment_id = treatments.treatment_id + anything_diag_id, + MAX(max_day100_itch_vas) AS max_max_day100_itch_vas, + COUNT(DISTINCT anything_patient_id) AS ndistinct_anything_patient_id + FROM _t1 GROUP BY 1 ) SELECT diagnoses.diag_name AS diagnosis_name, - _s3.ndistinct_patient_id AS num_patients, + _s3.ndistinct_anything_patient_id AS num_patients, _s3.max_max_day100_itch_vas AS max_itch_score FROM main.diagnoses AS diagnoses JOIN _s3 AS _s3 - ON _s3.diag_id = diagnoses.diag_id + ON _s3.anything_diag_id = diagnoses.diag_id ORDER BY 3 DESC, 2 DESC diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic4_postgres.sql b/tests/test_sql_refsols/defog_dermtreatment_basic4_postgres.sql index 3bcefdbaa..cc72c3961 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic4_postgres.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic4_postgres.sql @@ -1,28 +1,29 @@ -WITH _s1 AS ( +WITH _t1 AS ( SELECT - treatment_id, - MAX(day100_itch_vas) AS max_day100_itch_vas - FROM main.outcomes + MAX(treatments.diag_id) AS anything_diag_id, + MAX(treatments.patient_id) AS anything_patient_id, + MAX(outcomes.day100_itch_vas) AS max_day100_itch_vas + FROM main.treatments AS treatments + JOIN main.outcomes AS outcomes + ON outcomes.treatment_id = treatments.treatment_id GROUP BY - 1 + outcomes.treatment_id ), _s3 AS ( SELECT - treatments.diag_id, - MAX(_s1.max_day100_itch_vas) AS max_max_day100_itch_vas, - COUNT(DISTINCT treatments.patient_id) AS ndistinct_patient_id - FROM main.treatments AS treatments - JOIN _s1 AS _s1 - ON _s1.treatment_id = treatments.treatment_id + anything_diag_id, + MAX(max_day100_itch_vas) AS max_max_day100_itch_vas, + COUNT(DISTINCT anything_patient_id) AS ndistinct_anything_patient_id + FROM _t1 GROUP BY 1 ) SELECT diagnoses.diag_name AS diagnosis_name, - _s3.ndistinct_patient_id AS num_patients, + _s3.ndistinct_anything_patient_id AS num_patients, _s3.max_max_day100_itch_vas AS max_itch_score FROM main.diagnoses AS diagnoses JOIN _s3 AS _s3 - ON _s3.diag_id = diagnoses.diag_id + ON _s3.anything_diag_id = diagnoses.diag_id ORDER BY 3 DESC NULLS LAST, 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic4_snowflake.sql b/tests/test_sql_refsols/defog_dermtreatment_basic4_snowflake.sql index 3bcefdbaa..460b8dd84 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic4_snowflake.sql @@ -1,28 +1,29 @@ -WITH _s1 AS ( +WITH _t1 AS ( SELECT - treatment_id, - MAX(day100_itch_vas) AS max_day100_itch_vas - FROM main.outcomes + ANY_VALUE(treatments.diag_id) AS anything_diag_id, + ANY_VALUE(treatments.patient_id) AS anything_patient_id, + MAX(outcomes.day100_itch_vas) AS max_day100_itch_vas + FROM main.treatments AS treatments + JOIN main.outcomes AS outcomes + ON outcomes.treatment_id = treatments.treatment_id GROUP BY - 1 + outcomes.treatment_id ), _s3 AS ( SELECT - treatments.diag_id, - MAX(_s1.max_day100_itch_vas) AS max_max_day100_itch_vas, - COUNT(DISTINCT treatments.patient_id) AS ndistinct_patient_id - FROM main.treatments AS treatments - JOIN _s1 AS _s1 - ON _s1.treatment_id = treatments.treatment_id + anything_diag_id, + MAX(max_day100_itch_vas) AS max_max_day100_itch_vas, + COUNT(DISTINCT anything_patient_id) AS ndistinct_anything_patient_id + FROM _t1 GROUP BY 1 ) SELECT diagnoses.diag_name AS diagnosis_name, - _s3.ndistinct_patient_id AS num_patients, + _s3.ndistinct_anything_patient_id AS num_patients, _s3.max_max_day100_itch_vas AS max_itch_score FROM main.diagnoses AS diagnoses JOIN _s3 AS _s3 - ON _s3.diag_id = diagnoses.diag_id + ON _s3.anything_diag_id = diagnoses.diag_id ORDER BY 3 DESC NULLS LAST, 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic4_sqlite.sql b/tests/test_sql_refsols/defog_dermtreatment_basic4_sqlite.sql index 109955774..be3227e2e 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic4_sqlite.sql @@ -1,28 +1,29 @@ -WITH _s1 AS ( +WITH _t1 AS ( SELECT - treatment_id, - MAX(day100_itch_vas) AS max_day100_itch_vas - FROM main.outcomes + MAX(treatments.diag_id) AS anything_diag_id, + MAX(treatments.patient_id) AS anything_patient_id, + MAX(outcomes.day100_itch_vas) AS max_day100_itch_vas + FROM main.treatments AS treatments + JOIN main.outcomes AS outcomes + ON outcomes.treatment_id = treatments.treatment_id GROUP BY - 1 + outcomes.treatment_id ), _s3 AS ( SELECT - treatments.diag_id, - MAX(_s1.max_day100_itch_vas) AS max_max_day100_itch_vas, - COUNT(DISTINCT treatments.patient_id) AS ndistinct_patient_id - FROM main.treatments AS treatments - JOIN _s1 AS _s1 - ON _s1.treatment_id = treatments.treatment_id + anything_diag_id, + MAX(max_day100_itch_vas) AS max_max_day100_itch_vas, + COUNT(DISTINCT anything_patient_id) AS ndistinct_anything_patient_id + FROM _t1 GROUP BY 1 ) SELECT diagnoses.diag_name AS diagnosis_name, - _s3.ndistinct_patient_id AS num_patients, + _s3.ndistinct_anything_patient_id AS num_patients, _s3.max_max_day100_itch_vas AS max_itch_score FROM main.diagnoses AS diagnoses JOIN _s3 AS _s3 - ON _s3.diag_id = diagnoses.diag_id + ON _s3.anything_diag_id = diagnoses.diag_id ORDER BY 3 DESC, 2 DESC From a9ac04cc4202a3a9551ac344fb8a8f1d4d54998f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 15:51:38 -0400 Subject: [PATCH 102/143] Need to fix multi partition access --- .../conversion/join_aggregate_transpose.py | 38 ++++++++----- tests/test_plan_refsols/common_prefix_ad.txt | 16 +++--- tests/test_plan_refsols/common_prefix_al.txt | 12 ++--- tests/test_plan_refsols/common_prefix_n.txt | 10 ++-- tests/test_plan_refsols/correl_14.txt | 12 ++--- tests/test_plan_refsols/correl_15.txt | 12 ++--- tests/test_plan_refsols/correl_29.txt | 20 +++---- .../multi_partition_access_2.txt | 6 +-- .../multi_partition_access_4.txt | 6 +-- .../parts_quantity_increase_95_96.txt | 6 +-- tests/test_plan_refsols/tpch_q20.txt | 6 +-- tests/test_sql_refsols/correl_14_sqlite.sql | 8 +-- tests/test_sql_refsols/correl_15_sqlite.sql | 8 +-- tests/test_sql_refsols/correl_29_sqlite.sql | 54 +++++++++---------- .../defog_ewallet_adv10_ansi.sql | 4 +- .../defog_ewallet_adv10_mysql.sql | 4 +- .../defog_ewallet_adv10_postgres.sql | 4 +- .../defog_ewallet_adv10_snowflake.sql | 4 +- .../defog_ewallet_adv10_sqlite.sql | 4 +- .../defog_ewallet_adv11_ansi.sql | 4 +- .../defog_ewallet_adv11_mysql.sql | 4 +- .../defog_ewallet_adv11_postgres.sql | 4 +- .../defog_ewallet_adv11_snowflake.sql | 4 +- .../defog_ewallet_adv11_sqlite.sql | 4 +- .../defog_ewallet_adv8_ansi.sql | 4 +- .../defog_ewallet_adv8_mysql.sql | 4 +- .../defog_ewallet_adv8_postgres.sql | 4 +- .../defog_ewallet_adv8_snowflake.sql | 4 +- .../defog_ewallet_adv8_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q20_ansi.sql | 6 +-- tests/test_sql_refsols/tpch_q20_mysql.sql | 6 +-- tests/test_sql_refsols/tpch_q20_postgres.sql | 6 +-- tests/test_sql_refsols/tpch_q20_snowflake.sql | 6 +-- tests/test_sql_refsols/tpch_q20_sqlite.sql | 6 +-- 34 files changed, 159 insertions(+), 145 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index 07d0921fe..7dd4c7266 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -20,7 +20,11 @@ RelationalRoot, RelationalShuttle, ) -from pydough.relational.rel_util import add_input_name, apply_substitution +from pydough.relational.rel_util import ( + add_input_name, + apply_substitution, + extract_equijoin_keys, +) class JoinAggregateTransposeShuttle(RelationalShuttle): @@ -125,7 +129,7 @@ def join_aggregate_transpose( ) # Find all of the columns used in the join condition that come from the - # aggregate side of the join + # aggregate side of the join, and the other side as well. self.finder.reset() join.condition.accept(self.finder) agg_condition_columns: set[ColumnReference] = { @@ -179,6 +183,7 @@ def join_aggregate_transpose( agg_columns_remapped: dict[RelationalExpression, RelationalExpression] = {} join_sub: dict[RelationalExpression, RelationalExpression] = {} + agg_key_names: dict[str, str] = {} for col_name, col_expr in join.columns.items(): assert isinstance(col_expr, ColumnReference) @@ -192,6 +197,7 @@ def join_aggregate_transpose( new_aggregate_keys[agg_name] = ColumnReference( join_name, col_expr.data_type ) + agg_key_names[col_name] = agg_name agg_columns_remapped[aggregate.keys[col_expr.name]] = ( ColumnReference(join_name, col_expr.data_type) ) @@ -227,6 +233,7 @@ def join_aggregate_transpose( join_name = self.generate_name(agg_key_name, new_join_columns) agg_name = self.generate_name(agg_key_name, new_agg_names) new_join_columns[join_name] = add_input_name(agg_key_expr, agg_alias) + agg_key_names[agg_key_name] = agg_name new_aggregate_keys[agg_name] = ColumnReference( join_name, agg_key_expr.data_type ) @@ -249,18 +256,25 @@ def join_aggregate_transpose( new_join, new_aggregate_keys, new_aggregate_aggs ) - new_project: Project = Project(new_aggregate, project_columns) - - # print() - # print(join.to_tree_string()) - - # print() - # print(new_join.to_tree_string()) + # Create a mapping from the join keys on the non-aggregate side to those + # on the aggregate side, so that the non-aggregate keys are not used + # in the output. + agg_key_refs, non_agg_key_refs = extract_equijoin_keys(join) + if not is_left: + agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs - # print() - # print(new_project.to_tree_string()) + rev_join_map: dict[RelationalExpression, str] = { + expr: name for name, expr in join.columns.items() + } + for agg_key, non_agg_key in zip(agg_key_refs, non_agg_key_refs): + agg_key_name_lookup: str = agg_key_names[agg_key.name] + non_agg_key_name: str | None = rev_join_map.get(non_agg_key, None) + if agg_key_name_lookup is not None and non_agg_key_name is not None: + project_columns[non_agg_key_name] = ColumnReference( + agg_key_name_lookup, agg_key.data_type + ) - # breakpoint() + new_project: Project = Project(new_aggregate, project_columns) return new_project diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 46f5f7a1e..022d52ee5 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,11 +1,11 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('part_qty', anything_ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.anything_ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'anything_ps_availqty': t1.anything_ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_ps_availqty': ANYTHING(ps_availqty), 'anything_ps_suppkey': ANYTHING(ps_suppkey), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) +ROOT(columns=[('supplier_name', anything_s_name), ('part_name', anything_p_name), ('part_qty', anything_ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(anything_s_name):asc_first]) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_ps_availqty': ANYTHING(ps_availqty), 'anything_s_name': ANYTHING(s_name), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 7f3114112..77788a35d 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('cust_key', anything_anything_c_custkey), ('n_orders', DEFAULT_TO(anything_anything_n_rows, 0:numeric)), ('n_no_tax_discount', anything_n_rows)], orderings=[(anything_anything_c_custkey):asc_first]) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_anything_c_custkey': ANYTHING(anything_c_custkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_n_rows': ANYTHING(n_rows)}) - JOIN(condition=t0.anything_c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_c_custkey': t0.anything_c_custkey, 'anything_n_rows': t0.anything_n_rows, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) - LIMIT(limit=10:numeric, columns={'anything_c_custkey': anything_c_custkey, 'anything_n_rows': anything_n_rows, 'n_rows': n_rows}, orderings=[(anything_c_custkey):asc_first]) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_custkey': ANYTHING(c_custkey), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) +ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_anything_n_rows, 0:numeric)), ('n_no_tax_discount', anything_n_rows)], orderings=[(o_custkey):asc_first]) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_n_rows': ANYTHING(n_rows)}) + JOIN(condition=t0.o_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_rows': t0.anything_n_rows, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) + LIMIT(limit=10:numeric, columns={'anything_n_rows': anything_n_rows, 'n_rows': n_rows, 'o_custkey': o_custkey}, orderings=[(o_custkey):asc_first]) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 3fbeb60a7..0ac7dc610 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('key', anything_o_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (anything_o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'anything_o_orderkey': anything_o_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.anything_o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'anything_o_orderkey': t0.anything_o_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'anything_o_orderkey': ANYTHING(o_orderkey), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) +ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t1.n_rows, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 32a6e4466..9fe75904a 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('n', ndistinct_anything_s_suppkey)], orderings=[]) - AGGREGATE(keys={}, aggregations={'ndistinct_anything_s_suppkey': NDISTINCT(anything_s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey}) - JOIN(condition=t0.anything_s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey, 'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_s_suppkey': ANYTHING(s_suppkey), 'avg_p_retailprice': AVG(p_retailprice)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey, 's_suppkey': t0.s_suppkey}) +ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 82b371c39..2f8b7cb03 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('n', ndistinct_anything_s_suppkey)], orderings=[]) - AGGREGATE(keys={}, aggregations={'ndistinct_anything_s_suppkey': NDISTINCT(anything_s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey}) - JOIN(condition=t0.anything_s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'anything_s_suppkey': t0.anything_s_suppkey, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'anything_s_suppkey': ANYTHING(s_suppkey), 'supplier_avg_price': AVG(p_retailprice)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey, 's_suppkey': t0.s_suppkey}) +ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) + JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'supplier_avg_price': AVG(p_retailprice)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 2cb18dfd3..36c3bd12a 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,8 +1,8 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_name', anything_anything_anything_n_name), ('n_above_avg_customers', anything_anything_n_rows), ('n_above_avg_suppliers', agg_3_14), ('min_cust_acctbal', anything_min_c_acctbal), ('max_cust_acctbal', anything_max_c_acctbal)], orderings=[(anything_anything_anything_n_regionkey):asc_first, (anything_anything_anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_nationkey': t1.n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t0.c_acctbal, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -12,9 +12,9 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_n SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index adc747c7b..11e969ab7 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -8,9 +8,9 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.anything_customer_id_9 == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t0.anything_sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId_0': sbTxTickerId_0}, aggregations={'anything_customer_id_9': ANYTHING(customer_id_9), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'anything_sbTxType': ANYTHING(sbTxType)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxTickerId_0': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.anything_sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'anything_sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index ac8d02fb6..c284ce3c8 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.anything_sbTxCustId == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.anything_max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) - AGGREGATE(keys={'sbTxCustId_0': sbTxCustId_0}, aggregations={'anything_max_sbTxShares': ANYTHING(max_sbTxShares), 'anything_sbTxCustId': ANYTHING(sbTxCustId), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'max_max_sbTxShares': MAX(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxCustId_0': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.anything_max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'anything_max_sbTxShares': ANYTHING(max_sbTxShares), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'max_max_sbTxShares': MAX(sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 488f4fd87..c6e5c9a16 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', anything_anything_p_name), ('qty_95', DEFAULT_TO(anything_sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(anything_sum_l_quantity, 0:numeric)):desc_last, (anything_anything_p_name):asc_first], limit=3:numeric) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'agg_1': SUM(l_quantity), 'anything_anything_p_name': ANYTHING(anything_p_name), 'anything_sum_l_quantity': ANYTHING(sum_l_quantity)}) - JOIN(condition=t0.anything_p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t0.anything_p_name, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'sum_l_quantity': t0.sum_l_quantity}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_p_partkey': ANYTHING(p_partkey), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.l_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t0.anything_p_name, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'sum_l_quantity': t0.sum_l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_name': t0.p_name}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 5d72b9bb2..7100b288a 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -5,10 +5,10 @@ ROOT(columns=[('S_NAME', anything_s_name), ('S_ADDRESS', anything_s_address)], o SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.anything_p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_partkey': ANYTHING(p_partkey), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_partkey': t0.p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index cea605abf..011898756 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,6 +1,6 @@ WITH _s4 AS ( SELECT - MAX(supplier.s_suppkey) AS anything_s_suppkey, + partsupp.ps_suppkey, AVG(part.p_retailprice) AS avg_p_retailprice FROM tpch.supplier AS supplier JOIN tpch.partsupp AS partsupp @@ -10,13 +10,13 @@ WITH _s4 AS ( WHERE supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 GROUP BY - partsupp.ps_suppkey + 1 ) SELECT - COUNT(DISTINCT _s4.anything_s_suppkey) AS n + COUNT(DISTINCT _s4.ps_suppkey) AS n FROM _s4 AS _s4 JOIN tpch.partsupp AS partsupp - ON _s4.anything_s_suppkey = partsupp.ps_suppkey + ON _s4.ps_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part ON _s4.avg_p_retailprice > part.p_retailprice AND part.p_container = 'LG DRUM' diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index 10c89c472..f65eaa980 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -4,8 +4,8 @@ WITH _s0 AS ( FROM tpch.part ), _s6 AS ( SELECT + partsupp.ps_suppkey, MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, - MAX(supplier.s_suppkey) AS anything_s_suppkey, AVG(part.p_retailprice) AS supplier_avg_price FROM _s0 AS _s0 JOIN tpch.supplier AS supplier @@ -15,13 +15,13 @@ WITH _s0 AS ( JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey GROUP BY - partsupp.ps_suppkey + 1 ) SELECT - COUNT(DISTINCT _s6.anything_s_suppkey) AS n + COUNT(DISTINCT _s6.ps_suppkey) AS n FROM _s6 AS _s6 JOIN tpch.partsupp AS partsupp - ON _s6.anything_s_suppkey = partsupp.ps_suppkey + ON _s6.ps_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part ON _s6.supplier_avg_price > part.p_retailprice AND part.p_container = 'LG DRUM' diff --git a/tests/test_sql_refsols/correl_29_sqlite.sql b/tests/test_sql_refsols/correl_29_sqlite.sql index 8e3599f4b..ab54a3144 100644 --- a/tests/test_sql_refsols/correl_29_sqlite.sql +++ b/tests/test_sql_refsols/correl_29_sqlite.sql @@ -23,50 +23,50 @@ WITH _t5 AS ( ON _s1.avg_c_acctbal < _s3.c_acctbal AND _s3.c_nationkey = nation.n_nationkey GROUP BY 1 -), _s10 AS ( +), _t6 AS ( + SELECT + s_acctbal, + s_nationkey + FROM tpch.supplier +), _s7 AS ( + SELECT + s_nationkey, + AVG(s_acctbal) AS avg_s_acctbal + FROM _t6 + GROUP BY + 1 +), _t1 AS ( SELECT + _s5.c_nationkey, MAX(_t3.anything_n_name) AS anything_anything_n_name, MAX(_t3.anything_n_regionkey) AS anything_anything_n_regionkey, - MAX(_t3.n_nationkey) AS anything_n_nationkey, MAX(_t3.n_rows) AS anything_n_rows, MAX(_s5.c_acctbal) AS max_c_acctbal, MIN(_s5.c_acctbal) AS min_c_acctbal FROM _t3 AS _t3 JOIN _t5 AS _s5 ON _s5.c_nationkey = _t3.n_nationkey + JOIN tpch.nation AS nation + ON _s5.c_nationkey = nation.n_nationkey + JOIN _s7 AS _s7 + ON _s7.s_nationkey = nation.n_nationkey + JOIN _t6 AS _s9 + ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey WHERE _t3.anything_n_regionkey IN (1, 3) - GROUP BY - _s5.c_nationkey -), _t6 AS ( - SELECT - s_acctbal, - s_nationkey - FROM tpch.supplier -), _s7 AS ( - SELECT - s_nationkey, - AVG(s_acctbal) AS avg_s_acctbal - FROM _t6 GROUP BY 1 ) SELECT - MAX(_s10.anything_anything_n_regionkey) AS region_key, - MAX(_s10.anything_anything_n_name) AS nation_name, - MAX(_s10.anything_n_rows) AS n_above_avg_customers, + MAX(anything_anything_n_regionkey) AS region_key, + MAX(anything_anything_n_name) AS nation_name, + MAX(anything_n_rows) AS n_above_avg_customers, COUNT(*) AS n_above_avg_suppliers, - MAX(_s10.min_c_acctbal) AS min_cust_acctbal, - MAX(_s10.max_c_acctbal) AS max_cust_acctbal -FROM _s10 AS _s10 -JOIN tpch.nation AS nation - ON _s10.anything_n_nationkey = nation.n_nationkey -JOIN _s7 AS _s7 - ON _s7.s_nationkey = nation.n_nationkey -JOIN _t6 AS _s9 - ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey + MAX(min_c_acctbal) AS min_cust_acctbal, + MAX(max_c_acctbal) AS max_cust_acctbal +FROM _t1 GROUP BY - nation.n_nationkey + c_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql index 5e419b3c9..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql @@ -1,9 +1,9 @@ SELECT - ANY_VALUE(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql index 5e419b3c9..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql @@ -1,9 +1,9 @@ SELECT - ANY_VALUE(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql index b12f30f70..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql @@ -1,9 +1,9 @@ SELECT - MAX(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql index 5e419b3c9..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql @@ -1,9 +1,9 @@ SELECT - ANY_VALUE(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql index b12f30f70..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql @@ -1,9 +1,9 @@ SELECT - MAX(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql index 54888423e..185e648da 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( DATEDIFF( CAST(user_sessions.session_end_ts AS DATETIME), @@ -13,6 +13,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql index beecc5327..41b3428dd 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( TIMESTAMPDIFF(SECOND, user_sessions.session_start_ts, user_sessions.session_end_ts) ) AS total_duration @@ -9,6 +9,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql index 03f38d12f..e123f50d6 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql @@ -1,5 +1,5 @@ SELECT - MAX(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( EXTRACT(EPOCH FROM ( CAST(user_sessions.session_end_ts AS TIMESTAMP) - CAST(user_sessions.session_start_ts AS TIMESTAMP) @@ -11,6 +11,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql index ad121c7f4..7d8defbb6 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( DATEDIFF( SECOND, @@ -13,6 +13,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql index f26571a07..508a4685b 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql @@ -1,5 +1,5 @@ SELECT - MAX(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( ( ( @@ -15,6 +15,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql index de3b98e6b..1d96449b8 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql index b341dbba8..8519016f7 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql index 4b1d90d67..33e2f6661 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql @@ -1,5 +1,5 @@ SELECT - MAX(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, MAX(merchants.name) AS merchants_name, MAX(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql index b341dbba8..8519016f7 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql index 4b1d90d67..33e2f6661 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql @@ -1,5 +1,5 @@ SELECT - MAX(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, MAX(merchants.name) AS merchants_name, MAX(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 1b5d6ec54..090832727 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - ANY_VALUE(part.p_partkey) AS anything_p_partkey, + lineitem.l_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - lineitem.l_partkey + 1 ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.anything_p_partkey = partsupp.ps_partkey + ON _s5.l_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_mysql.sql b/tests/test_sql_refsols/tpch_q20_mysql.sql index 284ef586d..fb35009f6 100644 --- a/tests/test_sql_refsols/tpch_q20_mysql.sql +++ b/tests/test_sql_refsols/tpch_q20_mysql.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - ANY_VALUE(PART.p_partkey) AS anything_p_partkey, + LINEITEM.l_partkey, SUM(LINEITEM.l_quantity) AS sum_l_quantity FROM tpch.PART AS PART JOIN tpch.LINEITEM AS LINEITEM @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE PART.p_name LIKE 'forest%' GROUP BY - LINEITEM.l_partkey + 1 ) SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, @@ -23,7 +23,7 @@ JOIN _s5 AS _s5 ON PARTSUPP.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) - AND PARTSUPP.ps_partkey = _s5.anything_p_partkey + AND PARTSUPP.ps_partkey = _s5.l_partkey GROUP BY PARTSUPP.ps_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q20_postgres.sql b/tests/test_sql_refsols/tpch_q20_postgres.sql index 3f261a9bf..8da59668e 100644 --- a/tests/test_sql_refsols/tpch_q20_postgres.sql +++ b/tests/test_sql_refsols/tpch_q20_postgres.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - MAX(part.p_partkey) AS anything_p_partkey, + lineitem.l_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - lineitem.l_partkey + 1 ) SELECT MAX(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.anything_p_partkey = partsupp.ps_partkey + ON _s5.l_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_snowflake.sql b/tests/test_sql_refsols/tpch_q20_snowflake.sql index cc2d0c469..ec5447f59 100644 --- a/tests/test_sql_refsols/tpch_q20_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q20_snowflake.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - ANY_VALUE(part.p_partkey) AS anything_p_partkey, + lineitem.l_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE STARTSWITH(part.p_name, 'forest') GROUP BY - lineitem.l_partkey + 1 ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.anything_p_partkey = partsupp.ps_partkey + ON _s5.l_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index 46f3cd2d7..065135776 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - MAX(part.p_partkey) AS anything_p_partkey, + lineitem.l_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - lineitem.l_partkey + 1 ) SELECT MAX(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.anything_p_partkey = partsupp.ps_partkey + ON _s5.l_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) From a5c616064dba88e2fbdd20bc764b155cbf0a62cf Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 20:13:45 -0400 Subject: [PATCH 103/143] WIP revision phase --- .../conversion/join_aggregate_transpose.py | 242 +++++++++++------- pydough/conversion/relational_converter.py | 2 +- .../avg_order_diff_per_customer.txt | 12 +- tests/test_plan_refsols/common_prefix_ad.txt | 16 +- tests/test_plan_refsols/common_prefix_al.txt | 12 +- tests/test_plan_refsols/common_prefix_n.txt | 10 +- tests/test_plan_refsols/correl_14.txt | 12 +- tests/test_plan_refsols/correl_15.txt | 12 +- tests/test_plan_refsols/correl_29.txt | 20 +- tests/test_plan_refsols/correl_30.txt | 8 +- .../customer_largest_order_deltas.txt | 8 +- .../month_year_sliding_windows.txt | 2 +- .../multi_partition_access_2.txt | 30 +-- .../multi_partition_access_4.txt | 10 +- .../parts_quantity_increase_95_96.txt | 6 +- tests/test_plan_refsols/tpch_q20.txt | 6 +- tests/test_sql_refsols/correl_14_sqlite.sql | 8 +- tests/test_sql_refsols/correl_15_sqlite.sql | 8 +- tests/test_sql_refsols/correl_29_sqlite.sql | 54 ++-- tests/test_sql_refsols/correl_30_sqlite.sql | 9 +- .../defog_ewallet_adv10_ansi.sql | 4 +- .../defog_ewallet_adv10_mysql.sql | 4 +- .../defog_ewallet_adv10_postgres.sql | 4 +- .../defog_ewallet_adv10_snowflake.sql | 4 +- .../defog_ewallet_adv10_sqlite.sql | 4 +- .../defog_ewallet_adv11_ansi.sql | 4 +- .../defog_ewallet_adv11_mysql.sql | 4 +- .../defog_ewallet_adv11_postgres.sql | 4 +- .../defog_ewallet_adv11_snowflake.sql | 4 +- .../defog_ewallet_adv11_sqlite.sql | 4 +- .../defog_ewallet_adv8_ansi.sql | 4 +- .../defog_ewallet_adv8_mysql.sql | 4 +- .../defog_ewallet_adv8_postgres.sql | 4 +- .../defog_ewallet_adv8_snowflake.sql | 4 +- .../defog_ewallet_adv8_sqlite.sql | 4 +- .../nation_acctbal_breakdown_mysql.sql | 51 ++-- .../nation_acctbal_breakdown_sqlite.sql | 51 ++-- tests/test_sql_refsols/tpch_q20_ansi.sql | 6 +- tests/test_sql_refsols/tpch_q20_mysql.sql | 6 +- tests/test_sql_refsols/tpch_q20_postgres.sql | 6 +- tests/test_sql_refsols/tpch_q20_snowflake.sql | 6 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 6 +- 42 files changed, 366 insertions(+), 313 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index 7dd4c7266..f42f79446 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -23,7 +23,6 @@ from pydough.relational.rel_util import ( add_input_name, apply_substitution, - extract_equijoin_keys, ) @@ -47,7 +46,7 @@ def visit_join(self, node: Join) -> RelationalNode: if isinstance(node.inputs[0], Aggregate): result = self.join_aggregate_transpose(node, node.inputs[0], True) if result is not None: - return self.generic_visit_inputs(result) + return result.accept_shuttle(self) # If the attempt failed, then attempt the transpose where the right # input is an Aggregate. If this attempt succeeded, use that as the @@ -55,7 +54,7 @@ def visit_join(self, node: Join) -> RelationalNode: if isinstance(node.inputs[1], Aggregate): result = self.join_aggregate_transpose(node, node.inputs[1], False) if result is not None: - return self.generic_visit_inputs(result) + return result.accept_shuttle(self) # If this attempt failed, fall back to the regular implementation. return super().visit_join(node) @@ -152,14 +151,29 @@ def join_aggregate_transpose( join.default_input_aliases[1] if is_left else join.default_input_aliases[0] ) + # Identify the new cardinality of the join if the aggregate is no longer + # happening before the join. + new_cardinality: JoinCardinality = join.cardinality + new_reverse_cardinality: JoinCardinality = join.reverse_cardinality + if is_left: + new_reverse_cardinality = new_reverse_cardinality.add_plural() + else: + new_cardinality = new_cardinality.add_plural() + # A mapping that will be used to map every expression with regards to # the original join looking at its input expressions to what the # expression will be in the output columns of the new aggregate new_join_columns: dict[str, RelationalExpression] = {} - new_aggregate_keys: dict[str, RelationalExpression] = {} - new_aggregate_aggs: dict[str, CallExpression] = {} - new_agg_names: set[str] = set() + new_aggregate_keys: dict[str, RelationalExpression] = dict(aggregate.keys) + new_aggregate_aggs: dict[str, CallExpression] = dict(aggregate.aggregations) + new_agg_names: set[str] = set(aggregate.columns) + join_sub: dict[RelationalExpression, RelationalExpression] = {} + join_cond_sub: dict[RelationalExpression, RelationalExpression] = {} + for key_name, key_expr in aggregate.keys.items(): + join_cond_sub[ColumnReference(key_name, key_expr.data_type, agg_alias)] = ( + add_input_name(key_expr, agg_alias) + ) agg_input: RelationalNode = aggregate.inputs[0] non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] @@ -167,84 +181,118 @@ def join_aggregate_transpose( [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] ) - project_columns: dict[str, RelationalExpression] = {} - - # Identify the new cardinality of the join if the aggregate is no longer - # happening before the join. - new_cardinality: JoinCardinality = join.cardinality - new_reverse_cardinality: JoinCardinality = join.reverse_cardinality - if is_left: - new_reverse_cardinality = new_reverse_cardinality.add_plural() - else: - new_cardinality = new_cardinality.add_plural() + new_project_columns: dict[str, RelationalExpression] = {} + # Start by placing all of the columns from the aggregate node's input + # into the join's columns so that the aggregate keys/aggregations can + # refer to them with the same names, without any renaming caused by + # conflicts. join_name: str agg_name: str + for col_name, col_expr in agg_input.columns.items(): + join_name = self.generate_name(col_name, new_join_columns) + new_join_columns[join_name] = add_input_name(col_expr, agg_alias) + + # Add substitution remappings for the aggregate's output columns so that + # they are correctly renamed as regular references in the final + # projection, which will use terms from the original join's output but + # with this substitution applied to them. + for col_name, col_expr in aggregate.columns.items(): + join_sub[ColumnReference(col_name, col_expr.data_type, agg_alias)] = ( + ColumnReference(col_name, col_expr.data_type) + ) - agg_columns_remapped: dict[RelationalExpression, RelationalExpression] = {} - join_sub: dict[RelationalExpression, RelationalExpression] = {} - agg_key_names: dict[str, str] = {} - - for col_name, col_expr in join.columns.items(): - assert isinstance(col_expr, ColumnReference) + for col_name, col_expr in non_agg_input.columns.items(): join_name = self.generate_name(col_name, new_join_columns) + new_join_columns[join_name] = add_input_name(col_expr, non_agg_alias) agg_name = self.generate_name(col_name, new_agg_names) - if col_expr.input_name == agg_alias: - if col_expr.name in aggregate.keys: - new_join_columns[join_name] = add_input_name( - aggregate.keys[col_expr.name], agg_alias - ) - new_aggregate_keys[agg_name] = ColumnReference( - join_name, col_expr.data_type - ) - agg_key_names[col_name] = agg_name - agg_columns_remapped[aggregate.keys[col_expr.name]] = ( - ColumnReference(join_name, col_expr.data_type) - ) - else: - sub_agg_name: str - current_agg: CallExpression = aggregate.aggregations[col_expr.name] - for arg in current_agg.inputs: - sub_agg_name = self.generate_name("expr", new_join_columns) - new_join_columns[sub_agg_name] = add_input_name(arg, agg_alias) - agg_columns_remapped[arg] = ColumnReference( - sub_agg_name, arg.data_type - ) - new_call = apply_substitution( - aggregate.aggregations[col_expr.name], agg_columns_remapped, {} - ) - assert isinstance(new_call, CallExpression) - new_aggregate_aggs[agg_name] = new_call - new_agg_names.add(agg_name) - else: - new_join_columns[join_name] = ColumnReference( - col_expr.name, col_expr.data_type, non_agg_alias - ) - new_aggregate_aggs[agg_name] = CallExpression( - pydop.ANYTHING, - col_expr.data_type, - [ColumnReference(join_name, col_expr.data_type)], - ) - new_agg_names.add(agg_name) - project_columns[col_name] = ColumnReference(agg_name, col_expr.data_type) - - for agg_key_name, agg_key_expr in aggregate.keys.items(): - if agg_key_name not in new_aggregate_keys: - join_name = self.generate_name(agg_key_name, new_join_columns) - agg_name = self.generate_name(agg_key_name, new_agg_names) - new_join_columns[join_name] = add_input_name(agg_key_expr, agg_alias) - agg_key_names[agg_key_name] = agg_name - new_aggregate_keys[agg_name] = ColumnReference( - join_name, agg_key_expr.data_type - ) - new_agg_names.add(agg_name) - join_sub[ - ColumnReference(agg_key_name, agg_key_expr.data_type, agg_alias) - ] = new_join_columns[join_name] + new_aggregate_aggs[agg_name] = CallExpression( + pydop.ANYTHING, + col_expr.data_type, + [ColumnReference(join_name, col_expr.data_type)], + ) + new_agg_names.add(agg_name) + join_sub[ColumnReference(col_name, col_expr.data_type, non_agg_alias)] = ( + ColumnReference(agg_name, col_expr.data_type) + ) + + # for key_name, key_expr in aggregate.keys.items(): + # agg_name: str = self.generate_name(key_name, new_agg_names) + # new_aggregate_keys[agg_name] = ColumnReference( + # self.generate_name(key_name, new_join_columns), + # key_expr.data_type, + # ) + # new_agg_names.add(agg_name) + + # join_name: str + # agg_name: str + + # agg_columns_remapped: dict[RelationalExpression, RelationalExpression] = {} + # agg_key_names: dict[str, str] = {} + + # for col_name, col_expr in join.columns.items(): + # assert isinstance(col_expr, ColumnReference) + # join_name = self.generate_name(col_name, new_join_columns) + # agg_name = self.generate_name(col_name, new_agg_names) + # if col_expr.input_name == agg_alias: + # if col_expr.name in aggregate.keys: + # new_join_columns[join_name] = add_input_name( + # aggregate.keys[col_expr.name], agg_alias + # ) + # new_aggregate_keys[agg_name] = ColumnReference( + # join_name, col_expr.data_type + # ) + # agg_key_names[col_name] = agg_name + # agg_columns_remapped[aggregate.keys[col_expr.name]] = ( + # ColumnReference(join_name, col_expr.data_type) + # ) + # else: + # sub_agg_name: str + # current_agg: CallExpression = aggregate.aggregations[col_expr.name] + # for arg in current_agg.inputs: + # sub_agg_name = self.generate_name("expr", new_join_columns) + # new_join_columns[sub_agg_name] = add_input_name(arg, agg_alias) + # agg_columns_remapped[arg] = ColumnReference( + # sub_agg_name, arg.data_type + # ) + # new_call = apply_substitution( + # aggregate.aggregations[col_expr.name], agg_columns_remapped, {} + # ) + # assert isinstance(new_call, CallExpression) + # new_aggregate_aggs[agg_name] = new_call + # new_agg_names.add(agg_name) + # else: + # new_join_columns[join_name] = ColumnReference( + # col_expr.name, col_expr.data_type, non_agg_alias + # ) + # new_aggregate_aggs[agg_name] = CallExpression( + # pydop.ANYTHING, + # col_expr.data_type, + # [ColumnReference(join_name, col_expr.data_type)], + # ) + # new_agg_names.add(agg_name) + # project_columns[col_name] = ColumnReference(agg_name, col_expr.data_type) + + # for agg_key_name, agg_key_expr in aggregate.keys.items(): + # if agg_key_name not in new_aggregate_keys: + # join_name = self.generate_name(agg_key_name, new_join_columns) + # agg_name = self.generate_name(agg_key_name, new_agg_names) + # new_join_columns[join_name] = add_input_name(agg_key_expr, agg_alias) + # agg_key_names[agg_key_name] = agg_name + # new_aggregate_keys[agg_name] = ColumnReference( + # join_name, agg_key_expr.data_type + # ) + # new_agg_names.add(agg_name) + # join_sub[ + # ColumnReference(agg_key_name, agg_key_expr.data_type, agg_alias) + # ] = new_join_columns[join_name] + + for col_name, col_expr in join.columns.items(): + new_project_columns[col_name] = apply_substitution(col_expr, join_sub, {}) new_join: Join = Join( new_join_inputs, - apply_substitution(join.condition, join_sub, {}), + apply_substitution(join.condition, join_cond_sub, {}), join.join_type, new_join_columns, new_cardinality, @@ -256,25 +304,31 @@ def join_aggregate_transpose( new_join, new_aggregate_keys, new_aggregate_aggs ) - # Create a mapping from the join keys on the non-aggregate side to those - # on the aggregate side, so that the non-aggregate keys are not used - # in the output. - agg_key_refs, non_agg_key_refs = extract_equijoin_keys(join) - if not is_left: - agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs - - rev_join_map: dict[RelationalExpression, str] = { - expr: name for name, expr in join.columns.items() - } - for agg_key, non_agg_key in zip(agg_key_refs, non_agg_key_refs): - agg_key_name_lookup: str = agg_key_names[agg_key.name] - non_agg_key_name: str | None = rev_join_map.get(non_agg_key, None) - if agg_key_name_lookup is not None and non_agg_key_name is not None: - project_columns[non_agg_key_name] = ColumnReference( - agg_key_name_lookup, agg_key.data_type - ) - - new_project: Project = Project(new_aggregate, project_columns) + # # Create a mapping from the join keys on the non-aggregate side to those + # # on the aggregate side, so that the non-aggregate keys are not used + # # in the output. + # agg_key_refs, non_agg_key_refs = extract_equijoin_keys(join) + # if not is_left: + # agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs + + # rev_join_map: dict[RelationalExpression, str] = { + # expr: name for name, expr in join.columns.items() + # } + # for agg_key, non_agg_key in zip(agg_key_refs, non_agg_key_refs): + # agg_key_name_lookup: str = agg_key_names[agg_key.name] + # non_agg_key_name: str | None = rev_join_map.get(non_agg_key, None) + # if agg_key_name_lookup is not None and non_agg_key_name is not None: + # new_project_columns[non_agg_key_name] = ColumnReference( + # agg_key_name_lookup, agg_key.data_type + # ) + + new_project: Project = Project(new_aggregate, new_project_columns) + + print() + print(join.to_tree_string()) + + print() + print(new_project.to_tree_string()) return new_project diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 1c327a03b..3db00b942 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1586,7 +1586,7 @@ def optimize_relational_tree( # Re-run column bubbling now that the columns have been pruned again. root = bubble_column_names(root) - # Run the following pipeline twice: + # Run the following pipeline three times: # A: projection pullup # B: expression simplification # C: filter pushdown diff --git a/tests/test_plan_refsols/avg_order_diff_per_customer.txt b/tests/test_plan_refsols/avg_order_diff_per_customer.txt index a3965cd56..52e73ac8d 100644 --- a/tests/test_plan_refsols/avg_order_diff_per_customer.txt +++ b/tests/test_plan_refsols/avg_order_diff_per_customer.txt @@ -1,10 +1,10 @@ ROOT(columns=[('name', anything_c_name), ('avg_diff', avg_day_diff)], orderings=[(avg_day_diff):desc_last], limit=5:numeric) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'avg_day_diff': AVG(day_diff)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'day_diff': t1.day_diff, 'o_custkey': t1.o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'day_diff': DATEDIFF('days':string, PREV(args=[o_orderdate], partition=[o_custkey], order=[(o_orderdate):asc_last]), o_orderdate), 'o_custkey': o_custkey}) + PROJECT(columns={'c_name': c_name, 'day_diff': DATEDIFF('days':string, PREV(args=[o_orderdate], partition=[o_custkey], order=[(o_orderdate):asc_last]), o_orderdate), 'o_custkey': o_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 022d52ee5..46f5f7a1e 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,11 +1,11 @@ -ROOT(columns=[('supplier_name', anything_s_name), ('part_name', anything_p_name), ('part_qty', anything_ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(anything_s_name):asc_first]) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_ps_availqty': ANYTHING(ps_availqty), 'anything_s_name': ANYTHING(s_name), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty}) +ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('part_qty', anything_ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) + JOIN(condition=t0.s_suppkey == t1.anything_ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'anything_ps_availqty': t1.anything_ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_ps_availqty': ANYTHING(ps_availqty), 'anything_ps_suppkey': ANYTHING(ps_suppkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 77788a35d..7f3114112 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_anything_n_rows, 0:numeric)), ('n_no_tax_discount', anything_n_rows)], orderings=[(o_custkey):asc_first]) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_n_rows': ANYTHING(n_rows)}) - JOIN(condition=t0.o_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_rows': t0.anything_n_rows, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) - LIMIT(limit=10:numeric, columns={'anything_n_rows': anything_n_rows, 'n_rows': n_rows, 'o_custkey': o_custkey}, orderings=[(o_custkey):asc_first]) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) +ROOT(columns=[('cust_key', anything_anything_c_custkey), ('n_orders', DEFAULT_TO(anything_anything_n_rows, 0:numeric)), ('n_no_tax_discount', anything_n_rows)], orderings=[(anything_anything_c_custkey):asc_first]) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_anything_c_custkey': ANYTHING(anything_c_custkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_n_rows': ANYTHING(n_rows)}) + JOIN(condition=t0.anything_c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_c_custkey': t0.anything_c_custkey, 'anything_n_rows': t0.anything_n_rows, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) + LIMIT(limit=10:numeric, columns={'anything_c_custkey': anything_c_custkey, 'anything_n_rows': anything_n_rows, 'n_rows': n_rows}, orderings=[(anything_c_custkey):asc_first]) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_custkey': ANYTHING(c_custkey), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 0ac7dc610..3fbeb60a7 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) +ROOT(columns=[('key', anything_o_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (anything_o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'anything_o_orderkey': anything_o_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.anything_o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'anything_o_orderkey': t0.anything_o_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'anything_o_orderkey': ANYTHING(o_orderkey), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t1.n_rows, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 9fe75904a..32a6e4466 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) - AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) +ROOT(columns=[('n', ndistinct_anything_s_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_anything_s_suppkey': NDISTINCT(anything_s_suppkey)}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey}) + JOIN(condition=t0.anything_s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey, 'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_s_suppkey': ANYTHING(s_suppkey), 'avg_p_retailprice': AVG(p_retailprice)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey, 's_suppkey': t0.s_suppkey}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 2f8b7cb03..82b371c39 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) - AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'supplier_avg_price': AVG(p_retailprice)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) +ROOT(columns=[('n', ndistinct_anything_s_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_anything_s_suppkey': NDISTINCT(anything_s_suppkey)}) + JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey}) + JOIN(condition=t0.anything_s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'anything_s_suppkey': t0.anything_s_suppkey, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'anything_s_suppkey': ANYTHING(s_suppkey), 'supplier_avg_price': AVG(p_retailprice)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 36c3bd12a..2cb18dfd3 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,8 +1,8 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_name', anything_anything_anything_n_name), ('n_above_avg_customers', anything_anything_n_rows), ('n_above_avg_suppliers', agg_3_14), ('min_cust_acctbal', anything_min_c_acctbal), ('max_cust_acctbal', anything_max_c_acctbal)], orderings=[(anything_anything_anything_n_regionkey):asc_first, (anything_anything_anything_n_name):asc_first]) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t0.c_acctbal, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_nationkey': t1.n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -12,9 +12,9 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_n SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index a9718cf64..680f69526 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', anything_n_rows)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'r_name': t0.r_name}) +ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_above_avg_suppliers)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'n_above_avg_suppliers': ANYTHING(COUNT()), 'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={}) JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index e30507b7e..30ca37e9a 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', anything_c_name), ('largest_diff', IFF(ABS(min_revenue_delta) > max_revenue_delta, min_revenue_delta, max_revenue_delta))], orderings=[(IFF(ABS(min_revenue_delta) > max_revenue_delta, min_revenue_delta, max_revenue_delta)):desc_last], limit=5:numeric) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'max_revenue_delta': MAX(revenue_delta), 'min_revenue_delta': MIN(revenue_delta)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'revenue_delta': t1.revenue_delta}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) - PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) + PROJECT(columns={'c_name': c_name, 'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'sum_r': t1.sum_r}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 44171bcf6..cac47c047 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,7 +1,7 @@ ROOT(columns=[('year', year_o_orderdate), ('month', month_o_orderdate)], orderings=[(year_o_orderdate):asc_first, (month_o_orderdate):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last, (month_o_orderdate):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last, (month_o_orderdate):asc_last], default=0.0), columns={'month_o_orderdate': month_o_orderdate, 'year_o_orderdate': year_o_orderdate}) AGGREGATE(keys={'month_o_orderdate': MONTH(o_orderdate), 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.year_o_orderdate == t1.year_o_orderdate, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.year_o_orderdate == YEAR(t1.o_orderdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > next_year_total_spent, columns={'year_o_orderdate': year_o_orderdate}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last], default=0.0), 'sum_o_totalprice': sum_o_totalprice, 'year_o_orderdate': year_o_orderdate}) AGGREGATE(keys={'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 11e969ab7..276e245d2 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,30 +1,30 @@ -ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_anything_sum_sbTxShares / sum_anything_count_sbTxShares), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) - JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_anything_sum_sbTxShares / t0.sum_anything_count_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_anything_count_sbTxShares': t1.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t1.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) +ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_sum_sbTxShares_1 / sum_count_sbTxShares_1), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares_1 / t0.sum_count_sbTxShares_1 & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t1.sum_sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.anything_sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'anything_sbTxType': ANYTHING(sbTxType)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.anything_sbTxCustId_0 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.anything_sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'anything_sbTxCustId_0': ANYTHING(sbTxCustId_0), 'anything_sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxCustId_0': t1.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_anything_count_sbTxShares': t1.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t1.sum_anything_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_anything_count_sbTxShares': SUM(anything_count_sbTxShares), 'sum_anything_sum_sbTxShares': SUM(anything_sum_sbTxShares)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'anything_count_sbTxShares': ANYTHING(count_sbTxShares), 'anything_sum_sbTxShares': ANYTHING(sum_sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t1.sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': ANYTHING(COUNT(sbTxShares)), 'sum_sbTxShares': ANYTHING(SUM(sbTxShares))}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'avg_sbTxShares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index c284ce3c8..b21a4a90e 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.anything_max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'anything_max_sbTxShares': ANYTHING(max_sbTxShares), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'max_max_sbTxShares': MAX(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'max_max_sbTxShares': MAX(sbTxShares), 'max_sbTxShares': ANYTHING(MAX(sbTxShares))}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index c6e5c9a16..488f4fd87 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', anything_anything_p_name), ('qty_95', DEFAULT_TO(anything_sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(anything_sum_l_quantity, 0:numeric)):desc_last, (anything_anything_p_name):asc_first], limit=3:numeric) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'agg_1': SUM(l_quantity), 'anything_anything_p_name': ANYTHING(anything_p_name), 'anything_sum_l_quantity': ANYTHING(sum_l_quantity)}) - JOIN(condition=t0.l_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t0.anything_p_name, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'sum_l_quantity': t0.sum_l_quantity}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_name': t0.p_name}) + JOIN(condition=t0.anything_p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t0.anything_p_name, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'sum_l_quantity': t0.sum_l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_p_partkey': ANYTHING(p_partkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 7100b288a..5d72b9bb2 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -5,10 +5,10 @@ ROOT(columns=[('S_NAME', anything_s_name), ('S_ADDRESS', anything_s_address)], o SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.anything_p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_partkey': ANYTHING(p_partkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_partkey': t0.p_partkey}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index 011898756..cea605abf 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,6 +1,6 @@ WITH _s4 AS ( SELECT - partsupp.ps_suppkey, + MAX(supplier.s_suppkey) AS anything_s_suppkey, AVG(part.p_retailprice) AS avg_p_retailprice FROM tpch.supplier AS supplier JOIN tpch.partsupp AS partsupp @@ -10,13 +10,13 @@ WITH _s4 AS ( WHERE supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 GROUP BY - 1 + partsupp.ps_suppkey ) SELECT - COUNT(DISTINCT _s4.ps_suppkey) AS n + COUNT(DISTINCT _s4.anything_s_suppkey) AS n FROM _s4 AS _s4 JOIN tpch.partsupp AS partsupp - ON _s4.ps_suppkey = partsupp.ps_suppkey + ON _s4.anything_s_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part ON _s4.avg_p_retailprice > part.p_retailprice AND part.p_container = 'LG DRUM' diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index f65eaa980..10c89c472 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -4,8 +4,8 @@ WITH _s0 AS ( FROM tpch.part ), _s6 AS ( SELECT - partsupp.ps_suppkey, MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, + MAX(supplier.s_suppkey) AS anything_s_suppkey, AVG(part.p_retailprice) AS supplier_avg_price FROM _s0 AS _s0 JOIN tpch.supplier AS supplier @@ -15,13 +15,13 @@ WITH _s0 AS ( JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey GROUP BY - 1 + partsupp.ps_suppkey ) SELECT - COUNT(DISTINCT _s6.ps_suppkey) AS n + COUNT(DISTINCT _s6.anything_s_suppkey) AS n FROM _s6 AS _s6 JOIN tpch.partsupp AS partsupp - ON _s6.ps_suppkey = partsupp.ps_suppkey + ON _s6.anything_s_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part ON _s6.supplier_avg_price > part.p_retailprice AND part.p_container = 'LG DRUM' diff --git a/tests/test_sql_refsols/correl_29_sqlite.sql b/tests/test_sql_refsols/correl_29_sqlite.sql index ab54a3144..8e3599f4b 100644 --- a/tests/test_sql_refsols/correl_29_sqlite.sql +++ b/tests/test_sql_refsols/correl_29_sqlite.sql @@ -23,50 +23,50 @@ WITH _t5 AS ( ON _s1.avg_c_acctbal < _s3.c_acctbal AND _s3.c_nationkey = nation.n_nationkey GROUP BY 1 -), _t6 AS ( - SELECT - s_acctbal, - s_nationkey - FROM tpch.supplier -), _s7 AS ( - SELECT - s_nationkey, - AVG(s_acctbal) AS avg_s_acctbal - FROM _t6 - GROUP BY - 1 -), _t1 AS ( +), _s10 AS ( SELECT - _s5.c_nationkey, MAX(_t3.anything_n_name) AS anything_anything_n_name, MAX(_t3.anything_n_regionkey) AS anything_anything_n_regionkey, + MAX(_t3.n_nationkey) AS anything_n_nationkey, MAX(_t3.n_rows) AS anything_n_rows, MAX(_s5.c_acctbal) AS max_c_acctbal, MIN(_s5.c_acctbal) AS min_c_acctbal FROM _t3 AS _t3 JOIN _t5 AS _s5 ON _s5.c_nationkey = _t3.n_nationkey - JOIN tpch.nation AS nation - ON _s5.c_nationkey = nation.n_nationkey - JOIN _s7 AS _s7 - ON _s7.s_nationkey = nation.n_nationkey - JOIN _t6 AS _s9 - ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey WHERE _t3.anything_n_regionkey IN (1, 3) + GROUP BY + _s5.c_nationkey +), _t6 AS ( + SELECT + s_acctbal, + s_nationkey + FROM tpch.supplier +), _s7 AS ( + SELECT + s_nationkey, + AVG(s_acctbal) AS avg_s_acctbal + FROM _t6 GROUP BY 1 ) SELECT - MAX(anything_anything_n_regionkey) AS region_key, - MAX(anything_anything_n_name) AS nation_name, - MAX(anything_n_rows) AS n_above_avg_customers, + MAX(_s10.anything_anything_n_regionkey) AS region_key, + MAX(_s10.anything_anything_n_name) AS nation_name, + MAX(_s10.anything_n_rows) AS n_above_avg_customers, COUNT(*) AS n_above_avg_suppliers, - MAX(min_c_acctbal) AS min_cust_acctbal, - MAX(max_c_acctbal) AS max_cust_acctbal -FROM _t1 + MAX(_s10.min_c_acctbal) AS min_cust_acctbal, + MAX(_s10.max_c_acctbal) AS max_cust_acctbal +FROM _s10 AS _s10 +JOIN tpch.nation AS nation + ON _s10.anything_n_nationkey = nation.n_nationkey +JOIN _s7 AS _s7 + ON _s7.s_nationkey = nation.n_nationkey +JOIN _t6 AS _s9 + ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey GROUP BY - c_nationkey + nation.n_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/correl_30_sqlite.sql b/tests/test_sql_refsols/correl_30_sqlite.sql index e94f9a71c..f8b25e24b 100644 --- a/tests/test_sql_refsols/correl_30_sqlite.sql +++ b/tests/test_sql_refsols/correl_30_sqlite.sql @@ -30,9 +30,8 @@ WITH _t2 AS ( GROUP BY 1 ), _s13 AS ( - SELECT - nation.n_nationkey, - COUNT(*) AS n_rows + SELECT DISTINCT + nation.n_nationkey FROM tpch.nation AS nation JOIN _s7 AS _s7 ON _s7.s_nationkey = nation.n_nationkey @@ -40,14 +39,12 @@ WITH _t2 AS ( ON _t6.r_regionkey = nation.n_regionkey JOIN _t5 AS _s11 ON _s11.s_acctbal > _s7.avg_s_acctbal AND _s11.s_nationkey = nation.n_nationkey - GROUP BY - 1 ) SELECT MAX(LOWER(_t3.r_name)) AS region_name, MAX(nation.n_name) AS nation_name, COUNT(*) AS n_above_avg_customers, - MAX(_s13.n_rows) AS n_above_avg_suppliers + MAX(COUNT(*)) AS n_above_avg_suppliers FROM tpch.nation AS nation JOIN _s1 AS _s1 ON _s1.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql index f3eae3bdb..5e419b3c9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql @@ -1,9 +1,9 @@ SELECT - wallet_transactions_daily.sender_id AS user_id, + ANY_VALUE(users.uid) AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - 1 + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql index f3eae3bdb..5e419b3c9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql @@ -1,9 +1,9 @@ SELECT - wallet_transactions_daily.sender_id AS user_id, + ANY_VALUE(users.uid) AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - 1 + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql index f3eae3bdb..b12f30f70 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql @@ -1,9 +1,9 @@ SELECT - wallet_transactions_daily.sender_id AS user_id, + MAX(users.uid) AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - 1 + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql index f3eae3bdb..5e419b3c9 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql @@ -1,9 +1,9 @@ SELECT - wallet_transactions_daily.sender_id AS user_id, + ANY_VALUE(users.uid) AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - 1 + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql index f3eae3bdb..b12f30f70 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql @@ -1,9 +1,9 @@ SELECT - wallet_transactions_daily.sender_id AS user_id, + MAX(users.uid) AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - 1 + wallet_transactions_daily.sender_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql index 185e648da..54888423e 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql @@ -1,5 +1,5 @@ SELECT - user_sessions.user_id AS uid, + ANY_VALUE(users.uid) AS uid, SUM( DATEDIFF( CAST(user_sessions.session_end_ts AS DATETIME), @@ -13,6 +13,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - 1 + user_sessions.user_id ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql index 41b3428dd..beecc5327 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql @@ -1,5 +1,5 @@ SELECT - user_sessions.user_id AS uid, + ANY_VALUE(users.uid) AS uid, SUM( TIMESTAMPDIFF(SECOND, user_sessions.session_start_ts, user_sessions.session_end_ts) ) AS total_duration @@ -9,6 +9,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - 1 + user_sessions.user_id ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql index e123f50d6..03f38d12f 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql @@ -1,5 +1,5 @@ SELECT - user_sessions.user_id AS uid, + MAX(users.uid) AS uid, SUM( EXTRACT(EPOCH FROM ( CAST(user_sessions.session_end_ts AS TIMESTAMP) - CAST(user_sessions.session_start_ts AS TIMESTAMP) @@ -11,6 +11,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - 1 + user_sessions.user_id ORDER BY 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql index 7d8defbb6..ad121c7f4 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql @@ -1,5 +1,5 @@ SELECT - user_sessions.user_id AS uid, + ANY_VALUE(users.uid) AS uid, SUM( DATEDIFF( SECOND, @@ -13,6 +13,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - 1 + user_sessions.user_id ORDER BY 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql index 508a4685b..f26571a07 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql @@ -1,5 +1,5 @@ SELECT - user_sessions.user_id AS uid, + MAX(users.uid) AS uid, SUM( ( ( @@ -15,6 +15,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - 1 + user_sessions.user_id ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql index 1d96449b8..de3b98e6b 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql @@ -1,5 +1,5 @@ SELECT - wallet_transactions_daily.receiver_id AS merchants_id, + ANY_VALUE(merchants.mid) AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - 1 + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql index 8519016f7..b341dbba8 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql @@ -1,5 +1,5 @@ SELECT - wallet_transactions_daily.receiver_id AS merchants_id, + ANY_VALUE(merchants.mid) AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - 1 + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql index 33e2f6661..4b1d90d67 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql @@ -1,5 +1,5 @@ SELECT - wallet_transactions_daily.receiver_id AS merchants_id, + MAX(merchants.mid) AS merchants_id, MAX(merchants.name) AS merchants_name, MAX(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - 1 + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql index 8519016f7..b341dbba8 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql @@ -1,5 +1,5 @@ SELECT - wallet_transactions_daily.receiver_id AS merchants_id, + ANY_VALUE(merchants.mid) AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - 1 + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql index 33e2f6661..4b1d90d67 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql @@ -1,5 +1,5 @@ SELECT - wallet_transactions_daily.receiver_id AS merchants_id, + MAX(merchants.mid) AS merchants_id, MAX(merchants.name) AS merchants_name, MAX(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - 1 + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_mysql.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_mysql.sql index 463f75371..85a03e7e5 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_mysql.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_mysql.sql @@ -1,61 +1,62 @@ -WITH _s3 AS ( +WITH _t1 AS ( SELECT - c_acctbal, - c_nationkey, + CUSTOMER.c_acctbal, + CUSTOMER.c_nationkey, + NATION.n_name, CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY c_nationkey ORDER BY CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END DESC) - 1.0 + ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY CASE WHEN CUSTOMER.c_acctbal >= 0 THEN CUSTOMER.c_acctbal ELSE NULL END DESC) - 1.0 ) - ( ( - COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) OVER (PARTITION BY c_nationkey) - 1.0 + COUNT(CASE WHEN CUSTOMER.c_acctbal >= 0 THEN CUSTOMER.c_acctbal ELSE NULL END) OVER (PARTITION BY CUSTOMER.c_nationkey) - 1.0 ) / 2.0 ) ) < 1.0 - THEN CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END + THEN CASE WHEN CUSTOMER.c_acctbal >= 0 THEN CUSTOMER.c_acctbal ELSE NULL END ELSE NULL END AS expr_5, CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY c_nationkey ORDER BY c_acctbal DESC) - 1.0 + ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY CUSTOMER.c_acctbal DESC) - 1.0 ) - ( ( - COUNT(c_acctbal) OVER (PARTITION BY c_nationkey) - 1.0 + COUNT(CUSTOMER.c_acctbal) OVER (PARTITION BY CUSTOMER.c_nationkey) - 1.0 ) / 2.0 ) ) < 1.0 - THEN c_acctbal + THEN CUSTOMER.c_acctbal ELSE NULL END AS expr_6, CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY c_nationkey ORDER BY CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END DESC) - 1.0 + ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY CASE WHEN CUSTOMER.c_acctbal < 0 THEN CUSTOMER.c_acctbal ELSE NULL END DESC) - 1.0 ) - ( ( - COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) OVER (PARTITION BY c_nationkey) - 1.0 + COUNT(CASE WHEN CUSTOMER.c_acctbal < 0 THEN CUSTOMER.c_acctbal ELSE NULL END) OVER (PARTITION BY CUSTOMER.c_nationkey) - 1.0 ) / 2.0 ) ) < 1.0 - THEN CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END + THEN CASE WHEN CUSTOMER.c_acctbal < 0 THEN CUSTOMER.c_acctbal ELSE NULL END ELSE NULL END AS expr_7 - FROM tpch.CUSTOMER + FROM tpch.NATION AS NATION + JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'AMERICA' + JOIN tpch.CUSTOMER AS CUSTOMER + ON CUSTOMER.c_nationkey = NATION.n_nationkey ) SELECT - ANY_VALUE(NATION.n_name) COLLATE utf8mb4_bin AS nation_name, - COUNT(CASE WHEN _s3.c_acctbal < 0 THEN _s3.c_acctbal ELSE NULL END) AS n_red_acctbal, - COUNT(CASE WHEN _s3.c_acctbal >= 0 THEN _s3.c_acctbal ELSE NULL END) AS n_black_acctbal, - AVG(_s3.expr_7) AS median_red_acctbal, - AVG(_s3.expr_5) AS median_black_acctbal, - AVG(_s3.expr_6) AS median_overall_acctbal -FROM tpch.NATION AS NATION -JOIN tpch.REGION AS REGION - ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'AMERICA' -JOIN _s3 AS _s3 - ON NATION.n_nationkey = _s3.c_nationkey + ANY_VALUE(n_name) COLLATE utf8mb4_bin AS nation_name, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + AVG(expr_7) AS median_red_acctbal, + AVG(expr_5) AS median_black_acctbal, + AVG(expr_6) AS median_overall_acctbal +FROM _t1 GROUP BY - _s3.c_nationkey + c_nationkey ORDER BY 1 diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql index 822eced2d..a64ebcc8e 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql @@ -1,61 +1,62 @@ -WITH _s3 AS ( +WITH _t1 AS ( SELECT - c_acctbal, - c_nationkey, + customer.c_acctbal, + customer.c_nationkey, + nation.n_name, CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY c_nationkey ORDER BY CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END DESC) - 1.0 + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END DESC) - 1.0 ) - ( CAST(( - COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) OVER (PARTITION BY c_nationkey) - 1.0 + COUNT(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) OVER (PARTITION BY customer.c_nationkey) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 - THEN CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END + THEN CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END ELSE NULL END AS expr_5, CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY c_nationkey ORDER BY c_acctbal DESC) - 1.0 + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal DESC) - 1.0 ) - ( CAST(( - COUNT(c_acctbal) OVER (PARTITION BY c_nationkey) - 1.0 + COUNT(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 - THEN c_acctbal + THEN customer.c_acctbal ELSE NULL END AS expr_6, CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY c_nationkey ORDER BY CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END DESC) - 1.0 + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END DESC) - 1.0 ) - ( CAST(( - COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) OVER (PARTITION BY c_nationkey) - 1.0 + COUNT(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) OVER (PARTITION BY customer.c_nationkey) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 - THEN CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END + THEN CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END ELSE NULL END AS expr_7 - FROM tpch.customer + FROM tpch.nation AS nation + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey ) SELECT - MAX(nation.n_name) AS nation_name, - COUNT(CASE WHEN _s3.c_acctbal < 0 THEN _s3.c_acctbal ELSE NULL END) AS n_red_acctbal, - COUNT(CASE WHEN _s3.c_acctbal >= 0 THEN _s3.c_acctbal ELSE NULL END) AS n_black_acctbal, - AVG(_s3.expr_7) AS median_red_acctbal, - AVG(_s3.expr_5) AS median_black_acctbal, - AVG(_s3.expr_6) AS median_overall_acctbal -FROM tpch.nation AS nation -JOIN tpch.region AS region - ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' -JOIN _s3 AS _s3 - ON _s3.c_nationkey = nation.n_nationkey + MAX(n_name) AS nation_name, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + AVG(expr_7) AS median_red_acctbal, + AVG(expr_5) AS median_black_acctbal, + AVG(expr_6) AS median_overall_acctbal +FROM _t1 GROUP BY - _s3.c_nationkey + c_nationkey ORDER BY 1 diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 090832727..1b5d6ec54 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - lineitem.l_partkey, + ANY_VALUE(part.p_partkey) AS anything_p_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - 1 + lineitem.l_partkey ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.l_partkey = partsupp.ps_partkey + ON _s5.anything_p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_mysql.sql b/tests/test_sql_refsols/tpch_q20_mysql.sql index fb35009f6..284ef586d 100644 --- a/tests/test_sql_refsols/tpch_q20_mysql.sql +++ b/tests/test_sql_refsols/tpch_q20_mysql.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - LINEITEM.l_partkey, + ANY_VALUE(PART.p_partkey) AS anything_p_partkey, SUM(LINEITEM.l_quantity) AS sum_l_quantity FROM tpch.PART AS PART JOIN tpch.LINEITEM AS LINEITEM @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE PART.p_name LIKE 'forest%' GROUP BY - 1 + LINEITEM.l_partkey ) SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, @@ -23,7 +23,7 @@ JOIN _s5 AS _s5 ON PARTSUPP.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) - AND PARTSUPP.ps_partkey = _s5.l_partkey + AND PARTSUPP.ps_partkey = _s5.anything_p_partkey GROUP BY PARTSUPP.ps_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q20_postgres.sql b/tests/test_sql_refsols/tpch_q20_postgres.sql index 8da59668e..3f261a9bf 100644 --- a/tests/test_sql_refsols/tpch_q20_postgres.sql +++ b/tests/test_sql_refsols/tpch_q20_postgres.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - lineitem.l_partkey, + MAX(part.p_partkey) AS anything_p_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - 1 + lineitem.l_partkey ) SELECT MAX(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.l_partkey = partsupp.ps_partkey + ON _s5.anything_p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_snowflake.sql b/tests/test_sql_refsols/tpch_q20_snowflake.sql index ec5447f59..cc2d0c469 100644 --- a/tests/test_sql_refsols/tpch_q20_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q20_snowflake.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - lineitem.l_partkey, + ANY_VALUE(part.p_partkey) AS anything_p_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE STARTSWITH(part.p_name, 'forest') GROUP BY - 1 + lineitem.l_partkey ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.l_partkey = partsupp.ps_partkey + ON _s5.anything_p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index 065135776..46f3cd2d7 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - lineitem.l_partkey, + MAX(part.p_partkey) AS anything_p_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - 1 + lineitem.l_partkey ) SELECT MAX(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.l_partkey = partsupp.ps_partkey + ON _s5.anything_p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) From 4ad8255f1f9fcb2c5ef705b5028fa16d812a6f12 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 20:19:43 -0400 Subject: [PATCH 104/143] Fixing join bug --- .../conversion/join_aggregate_transpose.py | 4 +++- tests/test_plan_refsols/correl_30.txt | 8 +++---- .../multi_partition_access_2.txt | 24 +++++++++---------- .../multi_partition_access_4.txt | 10 ++++---- tests/test_sql_refsols/correl_30_sqlite.sql | 9 ++++--- 5 files changed, 30 insertions(+), 25 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index f42f79446..7c4a6208a 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -204,7 +204,9 @@ def join_aggregate_transpose( for col_name, col_expr in non_agg_input.columns.items(): join_name = self.generate_name(col_name, new_join_columns) - new_join_columns[join_name] = add_input_name(col_expr, non_agg_alias) + new_join_columns[join_name] = ColumnReference( + col_name, col_expr.data_type, non_agg_alias + ) agg_name = self.generate_name(col_name, new_agg_names) new_aggregate_aggs[agg_name] = CallExpression( pydop.ANYTHING, diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 680f69526..a9718cf64 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_above_avg_suppliers)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'n_above_avg_suppliers': ANYTHING(COUNT()), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) +ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', anything_n_rows)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 276e245d2..ca8e826f9 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_sum_sbTxShares_1 / sum_count_sbTxShares_1), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares_1 / t0.sum_count_sbTxShares_1 & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t1.sum_sum_sbTxShares}) +ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_anything_sum_sbTxShares / sum_anything_count_sbTxShares), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) + JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_anything_sum_sbTxShares / t0.sum_anything_count_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_anything_count_sbTxShares': t1.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t1.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) @@ -16,15 +16,15 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_anything_count_sbTxShares': t1.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t1.sum_anything_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': ANYTHING(COUNT(sbTxShares)), 'sum_sbTxShares': ANYTHING(SUM(sbTxShares))}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_anything_count_sbTxShares': SUM(anything_count_sbTxShares), 'sum_anything_sum_sbTxShares': SUM(anything_sum_sbTxShares)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'anything_count_sbTxShares': ANYTHING(count_sbTxShares), 'anything_sum_sbTxShares': ANYTHING(sum_sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t1.sum_sbTxShares}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'avg_sbTxShares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index b21a4a90e..c284ce3c8 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'max_max_sbTxShares': MAX(sbTxShares), 'max_sbTxShares': ANYTHING(MAX(sbTxShares))}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.anything_max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'anything_max_sbTxShares': ANYTHING(max_sbTxShares), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'max_max_sbTxShares': MAX(sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_sql_refsols/correl_30_sqlite.sql b/tests/test_sql_refsols/correl_30_sqlite.sql index f8b25e24b..e94f9a71c 100644 --- a/tests/test_sql_refsols/correl_30_sqlite.sql +++ b/tests/test_sql_refsols/correl_30_sqlite.sql @@ -30,8 +30,9 @@ WITH _t2 AS ( GROUP BY 1 ), _s13 AS ( - SELECT DISTINCT - nation.n_nationkey + SELECT + nation.n_nationkey, + COUNT(*) AS n_rows FROM tpch.nation AS nation JOIN _s7 AS _s7 ON _s7.s_nationkey = nation.n_nationkey @@ -39,12 +40,14 @@ WITH _t2 AS ( ON _t6.r_regionkey = nation.n_regionkey JOIN _t5 AS _s11 ON _s11.s_acctbal > _s7.avg_s_acctbal AND _s11.s_nationkey = nation.n_nationkey + GROUP BY + 1 ) SELECT MAX(LOWER(_t3.r_name)) AS region_name, MAX(nation.n_name) AS nation_name, COUNT(*) AS n_above_avg_customers, - MAX(COUNT(*)) AS n_above_avg_suppliers + MAX(_s13.n_rows) AS n_above_avg_suppliers FROM tpch.nation AS nation JOIN _s1 AS _s1 ON _s1.c_nationkey = nation.n_nationkey From ee93d51bc335f0f742a8ed6aaed5812900d88d63 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 20:26:25 -0400 Subject: [PATCH 105/143] Adding back the join key switch --- .../conversion/join_aggregate_transpose.py | 114 +++--------------- tests/test_plan_refsols/common_prefix_ad.txt | 16 +-- tests/test_plan_refsols/common_prefix_al.txt | 12 +- tests/test_plan_refsols/common_prefix_n.txt | 10 +- tests/test_plan_refsols/correl_14.txt | 12 +- tests/test_plan_refsols/correl_15.txt | 12 +- tests/test_plan_refsols/correl_29.txt | 20 +-- .../multi_partition_access_2.txt | 6 +- .../parts_quantity_increase_95_96.txt | 6 +- tests/test_plan_refsols/tpch_q20.txt | 6 +- tests/test_sql_refsols/correl_14_sqlite.sql | 8 +- tests/test_sql_refsols/correl_15_sqlite.sql | 8 +- tests/test_sql_refsols/correl_29_sqlite.sql | 54 ++++----- .../defog_ewallet_adv10_ansi.sql | 4 +- .../defog_ewallet_adv10_mysql.sql | 4 +- .../defog_ewallet_adv10_postgres.sql | 4 +- .../defog_ewallet_adv10_snowflake.sql | 4 +- .../defog_ewallet_adv10_sqlite.sql | 4 +- .../defog_ewallet_adv11_ansi.sql | 4 +- .../defog_ewallet_adv11_mysql.sql | 4 +- .../defog_ewallet_adv11_postgres.sql | 4 +- .../defog_ewallet_adv11_snowflake.sql | 4 +- .../defog_ewallet_adv11_sqlite.sql | 4 +- .../defog_ewallet_adv8_ansi.sql | 4 +- .../defog_ewallet_adv8_mysql.sql | 4 +- .../defog_ewallet_adv8_postgres.sql | 4 +- .../defog_ewallet_adv8_snowflake.sql | 4 +- .../defog_ewallet_adv8_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q20_ansi.sql | 6 +- tests/test_sql_refsols/tpch_q20_mysql.sql | 6 +- tests/test_sql_refsols/tpch_q20_postgres.sql | 6 +- tests/test_sql_refsols/tpch_q20_snowflake.sql | 6 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 6 +- 33 files changed, 146 insertions(+), 228 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index 7c4a6208a..2e89faed4 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -23,6 +23,7 @@ from pydough.relational.rel_util import ( add_input_name, apply_substitution, + extract_equijoin_keys, ) @@ -144,6 +145,7 @@ def join_aggregate_transpose( ): return None + # TODO ADD COMMENTS agg_alias: str | None = ( join.default_input_aliases[0] if is_left else join.default_input_aliases[1] ) @@ -160,10 +162,7 @@ def join_aggregate_transpose( else: new_cardinality = new_cardinality.add_plural() - # A mapping that will be used to map every expression with regards to - # the original join looking at its input expressions to what the - # expression will be in the output columns of the new aggregate - + # TODO ADD COMMENTS new_join_columns: dict[str, RelationalExpression] = {} new_aggregate_keys: dict[str, RelationalExpression] = dict(aggregate.keys) new_aggregate_aggs: dict[str, CallExpression] = dict(aggregate.aggregations) @@ -175,6 +174,7 @@ def join_aggregate_transpose( add_input_name(key_expr, agg_alias) ) + # TODO ADD COMMENTS agg_input: RelationalNode = aggregate.inputs[0] non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] new_join_inputs: list[RelationalNode] = ( @@ -202,6 +202,7 @@ def join_aggregate_transpose( ColumnReference(col_name, col_expr.data_type) ) + # TODO ADD COMMENTS for col_name, col_expr in non_agg_input.columns.items(): join_name = self.generate_name(col_name, new_join_columns) new_join_columns[join_name] = ColumnReference( @@ -218,80 +219,19 @@ def join_aggregate_transpose( ColumnReference(agg_name, col_expr.data_type) ) - # for key_name, key_expr in aggregate.keys.items(): - # agg_name: str = self.generate_name(key_name, new_agg_names) - # new_aggregate_keys[agg_name] = ColumnReference( - # self.generate_name(key_name, new_join_columns), - # key_expr.data_type, - # ) - # new_agg_names.add(agg_name) - - # join_name: str - # agg_name: str - - # agg_columns_remapped: dict[RelationalExpression, RelationalExpression] = {} - # agg_key_names: dict[str, str] = {} - - # for col_name, col_expr in join.columns.items(): - # assert isinstance(col_expr, ColumnReference) - # join_name = self.generate_name(col_name, new_join_columns) - # agg_name = self.generate_name(col_name, new_agg_names) - # if col_expr.input_name == agg_alias: - # if col_expr.name in aggregate.keys: - # new_join_columns[join_name] = add_input_name( - # aggregate.keys[col_expr.name], agg_alias - # ) - # new_aggregate_keys[agg_name] = ColumnReference( - # join_name, col_expr.data_type - # ) - # agg_key_names[col_name] = agg_name - # agg_columns_remapped[aggregate.keys[col_expr.name]] = ( - # ColumnReference(join_name, col_expr.data_type) - # ) - # else: - # sub_agg_name: str - # current_agg: CallExpression = aggregate.aggregations[col_expr.name] - # for arg in current_agg.inputs: - # sub_agg_name = self.generate_name("expr", new_join_columns) - # new_join_columns[sub_agg_name] = add_input_name(arg, agg_alias) - # agg_columns_remapped[arg] = ColumnReference( - # sub_agg_name, arg.data_type - # ) - # new_call = apply_substitution( - # aggregate.aggregations[col_expr.name], agg_columns_remapped, {} - # ) - # assert isinstance(new_call, CallExpression) - # new_aggregate_aggs[agg_name] = new_call - # new_agg_names.add(agg_name) - # else: - # new_join_columns[join_name] = ColumnReference( - # col_expr.name, col_expr.data_type, non_agg_alias - # ) - # new_aggregate_aggs[agg_name] = CallExpression( - # pydop.ANYTHING, - # col_expr.data_type, - # [ColumnReference(join_name, col_expr.data_type)], - # ) - # new_agg_names.add(agg_name) - # project_columns[col_name] = ColumnReference(agg_name, col_expr.data_type) - - # for agg_key_name, agg_key_expr in aggregate.keys.items(): - # if agg_key_name not in new_aggregate_keys: - # join_name = self.generate_name(agg_key_name, new_join_columns) - # agg_name = self.generate_name(agg_key_name, new_agg_names) - # new_join_columns[join_name] = add_input_name(agg_key_expr, agg_alias) - # agg_key_names[agg_key_name] = agg_name - # new_aggregate_keys[agg_name] = ColumnReference( - # join_name, agg_key_expr.data_type - # ) - # new_agg_names.add(agg_name) - # join_sub[ - # ColumnReference(agg_key_name, agg_key_expr.data_type, agg_alias) - # ] = new_join_columns[join_name] + # For each join key from the non-aggregate side, alter its substitution + # to map it to the corresponding key from the aggregate side. + agg_key_refs, non_agg_key_refs = extract_equijoin_keys(join) + if not is_left: + agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs + for agg_key, non_agg_key in zip(agg_key_refs, non_agg_key_refs): + join_sub[non_agg_key] = join_sub[agg_key] + # TODO ADD COMMENTS for col_name, col_expr in join.columns.items(): new_project_columns[col_name] = apply_substitution(col_expr, join_sub, {}) + # TODO ADD COMMENTS new_join: Join = Join( new_join_inputs, apply_substitution(join.condition, join_cond_sub, {}), @@ -302,36 +242,14 @@ def join_aggregate_transpose( join.correl_name, ) + # TODO ADD COMMENTS new_aggregate: Aggregate = Aggregate( new_join, new_aggregate_keys, new_aggregate_aggs ) - # # Create a mapping from the join keys on the non-aggregate side to those - # # on the aggregate side, so that the non-aggregate keys are not used - # # in the output. - # agg_key_refs, non_agg_key_refs = extract_equijoin_keys(join) - # if not is_left: - # agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs - - # rev_join_map: dict[RelationalExpression, str] = { - # expr: name for name, expr in join.columns.items() - # } - # for agg_key, non_agg_key in zip(agg_key_refs, non_agg_key_refs): - # agg_key_name_lookup: str = agg_key_names[agg_key.name] - # non_agg_key_name: str | None = rev_join_map.get(non_agg_key, None) - # if agg_key_name_lookup is not None and non_agg_key_name is not None: - # new_project_columns[non_agg_key_name] = ColumnReference( - # agg_key_name_lookup, agg_key.data_type - # ) - + # TODO ADD COMMENTS new_project: Project = Project(new_aggregate, new_project_columns) - print() - print(join.to_tree_string()) - - print() - print(new_project.to_tree_string()) - return new_project diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 46f5f7a1e..022d52ee5 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,11 +1,11 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('part_qty', anything_ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.anything_ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'anything_ps_availqty': t1.anything_ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_ps_availqty': ANYTHING(ps_availqty), 'anything_ps_suppkey': ANYTHING(ps_suppkey), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) +ROOT(columns=[('supplier_name', anything_s_name), ('part_name', anything_p_name), ('part_qty', anything_ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(anything_s_name):asc_first]) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_ps_availqty': ANYTHING(ps_availqty), 'anything_s_name': ANYTHING(s_name), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 7f3114112..77788a35d 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('cust_key', anything_anything_c_custkey), ('n_orders', DEFAULT_TO(anything_anything_n_rows, 0:numeric)), ('n_no_tax_discount', anything_n_rows)], orderings=[(anything_anything_c_custkey):asc_first]) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_anything_c_custkey': ANYTHING(anything_c_custkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_n_rows': ANYTHING(n_rows)}) - JOIN(condition=t0.anything_c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_c_custkey': t0.anything_c_custkey, 'anything_n_rows': t0.anything_n_rows, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) - LIMIT(limit=10:numeric, columns={'anything_c_custkey': anything_c_custkey, 'anything_n_rows': anything_n_rows, 'n_rows': n_rows}, orderings=[(anything_c_custkey):asc_first]) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_custkey': ANYTHING(c_custkey), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) +ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_anything_n_rows, 0:numeric)), ('n_no_tax_discount', anything_n_rows)], orderings=[(o_custkey):asc_first]) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_n_rows': ANYTHING(n_rows)}) + JOIN(condition=t0.o_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_rows': t0.anything_n_rows, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) + LIMIT(limit=10:numeric, columns={'anything_n_rows': anything_n_rows, 'n_rows': n_rows, 'o_custkey': o_custkey}, orderings=[(o_custkey):asc_first]) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 3fbeb60a7..0ac7dc610 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('key', anything_o_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (anything_o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'anything_o_orderkey': anything_o_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.anything_o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'anything_o_orderkey': t0.anything_o_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'anything_o_orderkey': ANYTHING(o_orderkey), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) +ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t1.n_rows, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 32a6e4466..9fe75904a 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('n', ndistinct_anything_s_suppkey)], orderings=[]) - AGGREGATE(keys={}, aggregations={'ndistinct_anything_s_suppkey': NDISTINCT(anything_s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey}) - JOIN(condition=t0.anything_s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey, 'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_s_suppkey': ANYTHING(s_suppkey), 'avg_p_retailprice': AVG(p_retailprice)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey, 's_suppkey': t0.s_suppkey}) +ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 82b371c39..2f8b7cb03 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('n', ndistinct_anything_s_suppkey)], orderings=[]) - AGGREGATE(keys={}, aggregations={'ndistinct_anything_s_suppkey': NDISTINCT(anything_s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_s_suppkey': t0.anything_s_suppkey}) - JOIN(condition=t0.anything_s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'anything_s_suppkey': t0.anything_s_suppkey, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'anything_s_suppkey': ANYTHING(s_suppkey), 'supplier_avg_price': AVG(p_retailprice)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey, 's_suppkey': t0.s_suppkey}) +ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) + JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'supplier_avg_price': AVG(p_retailprice)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 2cb18dfd3..36c3bd12a 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,8 +1,8 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_name', anything_anything_anything_n_name), ('n_above_avg_customers', anything_anything_n_rows), ('n_above_avg_suppliers', agg_3_14), ('min_cust_acctbal', anything_min_c_acctbal), ('max_cust_acctbal', anything_max_c_acctbal)], orderings=[(anything_anything_anything_n_regionkey):asc_first, (anything_anything_anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_nationkey': t1.n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t0.c_acctbal, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -12,9 +12,9 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_n SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index ca8e826f9..11e969ab7 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -8,9 +8,9 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.anything_sbTxCustId_0 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.anything_sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'anything_sbTxCustId_0': ANYTHING(sbTxCustId_0), 'anything_sbTxType': ANYTHING(sbTxType)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxCustId_0': t1.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.anything_sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'anything_sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 488f4fd87..c6e5c9a16 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', anything_anything_p_name), ('qty_95', DEFAULT_TO(anything_sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(anything_sum_l_quantity, 0:numeric)):desc_last, (anything_anything_p_name):asc_first], limit=3:numeric) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'agg_1': SUM(l_quantity), 'anything_anything_p_name': ANYTHING(anything_p_name), 'anything_sum_l_quantity': ANYTHING(sum_l_quantity)}) - JOIN(condition=t0.anything_p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t0.anything_p_name, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'sum_l_quantity': t0.sum_l_quantity}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'anything_p_partkey': ANYTHING(p_partkey), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.l_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t0.anything_p_name, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'sum_l_quantity': t0.sum_l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_name': t0.p_name}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 5d72b9bb2..7100b288a 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -5,10 +5,10 @@ ROOT(columns=[('S_NAME', anything_s_name), ('S_ADDRESS', anything_s_address)], o SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.anything_p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'anything_p_partkey': ANYTHING(p_partkey), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'p_partkey': t0.p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index cea605abf..011898756 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,6 +1,6 @@ WITH _s4 AS ( SELECT - MAX(supplier.s_suppkey) AS anything_s_suppkey, + partsupp.ps_suppkey, AVG(part.p_retailprice) AS avg_p_retailprice FROM tpch.supplier AS supplier JOIN tpch.partsupp AS partsupp @@ -10,13 +10,13 @@ WITH _s4 AS ( WHERE supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 GROUP BY - partsupp.ps_suppkey + 1 ) SELECT - COUNT(DISTINCT _s4.anything_s_suppkey) AS n + COUNT(DISTINCT _s4.ps_suppkey) AS n FROM _s4 AS _s4 JOIN tpch.partsupp AS partsupp - ON _s4.anything_s_suppkey = partsupp.ps_suppkey + ON _s4.ps_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part ON _s4.avg_p_retailprice > part.p_retailprice AND part.p_container = 'LG DRUM' diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index 10c89c472..f65eaa980 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -4,8 +4,8 @@ WITH _s0 AS ( FROM tpch.part ), _s6 AS ( SELECT + partsupp.ps_suppkey, MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, - MAX(supplier.s_suppkey) AS anything_s_suppkey, AVG(part.p_retailprice) AS supplier_avg_price FROM _s0 AS _s0 JOIN tpch.supplier AS supplier @@ -15,13 +15,13 @@ WITH _s0 AS ( JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey GROUP BY - partsupp.ps_suppkey + 1 ) SELECT - COUNT(DISTINCT _s6.anything_s_suppkey) AS n + COUNT(DISTINCT _s6.ps_suppkey) AS n FROM _s6 AS _s6 JOIN tpch.partsupp AS partsupp - ON _s6.anything_s_suppkey = partsupp.ps_suppkey + ON _s6.ps_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part ON _s6.supplier_avg_price > part.p_retailprice AND part.p_container = 'LG DRUM' diff --git a/tests/test_sql_refsols/correl_29_sqlite.sql b/tests/test_sql_refsols/correl_29_sqlite.sql index 8e3599f4b..ab54a3144 100644 --- a/tests/test_sql_refsols/correl_29_sqlite.sql +++ b/tests/test_sql_refsols/correl_29_sqlite.sql @@ -23,50 +23,50 @@ WITH _t5 AS ( ON _s1.avg_c_acctbal < _s3.c_acctbal AND _s3.c_nationkey = nation.n_nationkey GROUP BY 1 -), _s10 AS ( +), _t6 AS ( + SELECT + s_acctbal, + s_nationkey + FROM tpch.supplier +), _s7 AS ( + SELECT + s_nationkey, + AVG(s_acctbal) AS avg_s_acctbal + FROM _t6 + GROUP BY + 1 +), _t1 AS ( SELECT + _s5.c_nationkey, MAX(_t3.anything_n_name) AS anything_anything_n_name, MAX(_t3.anything_n_regionkey) AS anything_anything_n_regionkey, - MAX(_t3.n_nationkey) AS anything_n_nationkey, MAX(_t3.n_rows) AS anything_n_rows, MAX(_s5.c_acctbal) AS max_c_acctbal, MIN(_s5.c_acctbal) AS min_c_acctbal FROM _t3 AS _t3 JOIN _t5 AS _s5 ON _s5.c_nationkey = _t3.n_nationkey + JOIN tpch.nation AS nation + ON _s5.c_nationkey = nation.n_nationkey + JOIN _s7 AS _s7 + ON _s7.s_nationkey = nation.n_nationkey + JOIN _t6 AS _s9 + ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey WHERE _t3.anything_n_regionkey IN (1, 3) - GROUP BY - _s5.c_nationkey -), _t6 AS ( - SELECT - s_acctbal, - s_nationkey - FROM tpch.supplier -), _s7 AS ( - SELECT - s_nationkey, - AVG(s_acctbal) AS avg_s_acctbal - FROM _t6 GROUP BY 1 ) SELECT - MAX(_s10.anything_anything_n_regionkey) AS region_key, - MAX(_s10.anything_anything_n_name) AS nation_name, - MAX(_s10.anything_n_rows) AS n_above_avg_customers, + MAX(anything_anything_n_regionkey) AS region_key, + MAX(anything_anything_n_name) AS nation_name, + MAX(anything_n_rows) AS n_above_avg_customers, COUNT(*) AS n_above_avg_suppliers, - MAX(_s10.min_c_acctbal) AS min_cust_acctbal, - MAX(_s10.max_c_acctbal) AS max_cust_acctbal -FROM _s10 AS _s10 -JOIN tpch.nation AS nation - ON _s10.anything_n_nationkey = nation.n_nationkey -JOIN _s7 AS _s7 - ON _s7.s_nationkey = nation.n_nationkey -JOIN _t6 AS _s9 - ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey + MAX(min_c_acctbal) AS min_cust_acctbal, + MAX(max_c_acctbal) AS max_cust_acctbal +FROM _t1 GROUP BY - nation.n_nationkey + c_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql index 5e419b3c9..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql @@ -1,9 +1,9 @@ SELECT - ANY_VALUE(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql index 5e419b3c9..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_mysql.sql @@ -1,9 +1,9 @@ SELECT - ANY_VALUE(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql index b12f30f70..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_postgres.sql @@ -1,9 +1,9 @@ SELECT - MAX(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql index 5e419b3c9..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql @@ -1,9 +1,9 @@ SELECT - ANY_VALUE(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql index b12f30f70..f3eae3bdb 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql @@ -1,9 +1,9 @@ SELECT - MAX(users.uid) AS user_id, + wallet_transactions_daily.sender_id AS user_id, COUNT(*) AS total_transactions FROM main.users AS users JOIN main.wallet_transactions_daily AS wallet_transactions_daily ON users.uid = wallet_transactions_daily.sender_id AND wallet_transactions_daily.sender_type = 0 GROUP BY - wallet_transactions_daily.sender_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql index 54888423e..185e648da 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( DATEDIFF( CAST(user_sessions.session_end_ts AS DATETIME), @@ -13,6 +13,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql index beecc5327..41b3428dd 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_mysql.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( TIMESTAMPDIFF(SECOND, user_sessions.session_start_ts, user_sessions.session_end_ts) ) AS total_duration @@ -9,6 +9,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql index 03f38d12f..e123f50d6 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_postgres.sql @@ -1,5 +1,5 @@ SELECT - MAX(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( EXTRACT(EPOCH FROM ( CAST(user_sessions.session_end_ts AS TIMESTAMP) - CAST(user_sessions.session_start_ts AS TIMESTAMP) @@ -11,6 +11,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql index ad121c7f4..7d8defbb6 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( DATEDIFF( SECOND, @@ -13,6 +13,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql index f26571a07..508a4685b 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql @@ -1,5 +1,5 @@ SELECT - MAX(users.uid) AS uid, + user_sessions.user_id AS uid, SUM( ( ( @@ -15,6 +15,6 @@ JOIN main.user_sessions AS user_sessions AND user_sessions.session_start_ts >= '2023-06-01' AND user_sessions.user_id = users.uid GROUP BY - user_sessions.user_id + 1 ORDER BY 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql index de3b98e6b..1d96449b8 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_ansi.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql index b341dbba8..8519016f7 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_mysql.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql index 4b1d90d67..33e2f6661 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_postgres.sql @@ -1,5 +1,5 @@ SELECT - MAX(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, MAX(merchants.name) AS merchants_name, MAX(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql index b341dbba8..8519016f7 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql @@ -1,5 +1,5 @@ SELECT - ANY_VALUE(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, ANY_VALUE(merchants.name) AS merchants_name, ANY_VALUE(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql index 4b1d90d67..33e2f6661 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv8_sqlite.sql @@ -1,5 +1,5 @@ SELECT - MAX(merchants.mid) AS merchants_id, + wallet_transactions_daily.receiver_id AS merchants_id, MAX(merchants.name) AS merchants_name, MAX(merchants.category) AS category, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, @@ -10,4 +10,4 @@ JOIN main.wallet_transactions_daily AS wallet_transactions_daily AND wallet_transactions_daily.receiver_type = 1 AND wallet_transactions_daily.status = 'success' GROUP BY - wallet_transactions_daily.receiver_id + 1 diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 1b5d6ec54..090832727 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - ANY_VALUE(part.p_partkey) AS anything_p_partkey, + lineitem.l_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - lineitem.l_partkey + 1 ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.anything_p_partkey = partsupp.ps_partkey + ON _s5.l_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_mysql.sql b/tests/test_sql_refsols/tpch_q20_mysql.sql index 284ef586d..fb35009f6 100644 --- a/tests/test_sql_refsols/tpch_q20_mysql.sql +++ b/tests/test_sql_refsols/tpch_q20_mysql.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - ANY_VALUE(PART.p_partkey) AS anything_p_partkey, + LINEITEM.l_partkey, SUM(LINEITEM.l_quantity) AS sum_l_quantity FROM tpch.PART AS PART JOIN tpch.LINEITEM AS LINEITEM @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE PART.p_name LIKE 'forest%' GROUP BY - LINEITEM.l_partkey + 1 ) SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, @@ -23,7 +23,7 @@ JOIN _s5 AS _s5 ON PARTSUPP.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) - AND PARTSUPP.ps_partkey = _s5.anything_p_partkey + AND PARTSUPP.ps_partkey = _s5.l_partkey GROUP BY PARTSUPP.ps_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q20_postgres.sql b/tests/test_sql_refsols/tpch_q20_postgres.sql index 3f261a9bf..8da59668e 100644 --- a/tests/test_sql_refsols/tpch_q20_postgres.sql +++ b/tests/test_sql_refsols/tpch_q20_postgres.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - MAX(part.p_partkey) AS anything_p_partkey, + lineitem.l_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - lineitem.l_partkey + 1 ) SELECT MAX(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.anything_p_partkey = partsupp.ps_partkey + ON _s5.l_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_snowflake.sql b/tests/test_sql_refsols/tpch_q20_snowflake.sql index cc2d0c469..ec5447f59 100644 --- a/tests/test_sql_refsols/tpch_q20_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q20_snowflake.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - ANY_VALUE(part.p_partkey) AS anything_p_partkey, + lineitem.l_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE STARTSWITH(part.p_name, 'forest') GROUP BY - lineitem.l_partkey + 1 ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.anything_p_partkey = partsupp.ps_partkey + ON _s5.l_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index 46f3cd2d7..065135776 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -1,6 +1,6 @@ WITH _s5 AS ( SELECT - MAX(part.p_partkey) AS anything_p_partkey, + lineitem.l_partkey, SUM(lineitem.l_quantity) AS sum_l_quantity FROM tpch.part AS part JOIN tpch.lineitem AS lineitem @@ -9,7 +9,7 @@ WITH _s5 AS ( WHERE part.p_name LIKE 'forest%' GROUP BY - lineitem.l_partkey + 1 ) SELECT MAX(supplier.s_name) AS S_NAME, @@ -20,7 +20,7 @@ JOIN tpch.nation AS nation JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN _s5 AS _s5 - ON _s5.anything_p_partkey = partsupp.ps_partkey + ON _s5.l_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) From bd036f73ffe00c65f21f26837f3c13d01d8fe8dd Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 20:49:57 -0400 Subject: [PATCH 106/143] Removing plural case due to logical flaw --- .../conversion/join_aggregate_transpose.py | 10 +--- pydough/conversion/relational_converter.py | 4 +- tests/test_plan_refsols/correl_29.txt | 18 +++---- .../multi_partition_access_2.txt | 34 ++++++------ .../multi_partition_access_3.txt | 14 ++--- .../multi_partition_access_4.txt | 10 ++-- tests/test_sql_refsols/correl_29_sqlite.sql | 52 +++++++++---------- 7 files changed, 68 insertions(+), 74 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index 2e89faed4..8b66c4865 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -93,13 +93,6 @@ def join_aggregate_transpose( The new RelationalNode tree with the Join and Aggregate transposed, or None if the transpose is not possible. """ - # Verify that the join is an inner, left, or semi-join, and that the - # join cardinality is singular (unless the aggregations are not affected - # by a change in cardinality). - aggs_allow_plural: bool = all( - call.op in (pydop.MIN, pydop.MAX, pydop.ANYTHING, pydop.NDISTINCT) - for call in aggregate.aggregations.values() - ) # The cardinality with regards to the input being considered must be # singular (unless the aggregations allow plural), and must be @@ -118,7 +111,7 @@ def join_aggregate_transpose( or (join.join_type == JoinType.SEMI and is_left) ) and cardinality.filters - and (cardinality.singular or aggs_allow_plural) + and cardinality.singular ): return None @@ -249,7 +242,6 @@ def join_aggregate_transpose( # TODO ADD COMMENTS new_project: Project = Project(new_aggregate, new_project_columns) - return new_project diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 3db00b942..75f9976d2 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1591,7 +1591,8 @@ def optimize_relational_tree( # B: expression simplification # C: filter pushdown # D: join-aggregate transpose - # E: column pruning + # E: redundant aggregation removal + # F: column pruning # This is done because pullup will create more opportunities for expression # simplification, which will allow more filters to be pushed further down, # and the combination of those together will create more opportunities for @@ -1602,6 +1603,7 @@ def optimize_relational_tree( simplify_expressions(root, session, additional_shuttles) root = confirm_root(push_filters(root, session)) root = confirm_root(pull_aggregates_above_joins(root)) + root = remove_redundant_aggs(root) root = pruner.prune_unused_columns(root) # Re-run projection merging, without pushing into joins. This will allow diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 36c3bd12a..72f58693b 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_name', anything_anything_anything_n_name), ('n_above_avg_customers', anything_anything_n_rows), ('n_above_avg_suppliers', agg_3_14), ('min_cust_acctbal', anything_min_c_acctbal), ('max_cust_acctbal', anything_max_c_acctbal)], orderings=[(anything_anything_anything_n_regionkey):asc_first, (anything_anything_anything_n_name):asc_first]) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t0.c_acctbal, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_nationkey': t1.n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) @@ -12,9 +12,9 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_n SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 11e969ab7..132528e7a 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,30 +1,30 @@ -ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_anything_sum_sbTxShares / sum_anything_count_sbTxShares), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) - JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_anything_sum_sbTxShares / t0.sum_anything_count_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_anything_count_sbTxShares': t0.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t0.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_anything_count_sbTxShares': t1.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t1.sum_anything_sum_sbTxShares, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares}) +ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_sum_sbTxShares_1 / sum_count_sbTxShares_1), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares_1 / t0.sum_count_sbTxShares_1 & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t1.sum_sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.anything_sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'anything_sbTxType': ANYTHING(sbTxType)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_anything_count_sbTxShares': t1.sum_anything_count_sbTxShares, 'sum_anything_sum_sbTxShares': t1.sum_anything_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_anything_count_sbTxShares': SUM(anything_count_sbTxShares), 'sum_anything_sum_sbTxShares': SUM(anything_sum_sbTxShares)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'anything_count_sbTxShares': ANYTHING(count_sbTxShares), 'anything_sum_sbTxShares': ANYTHING(sum_sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t1.sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_sbTxShares': t1.sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'avg_sbTxShares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index bcd3fedcb..3203eb669 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,12 +1,12 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) - JOIN(condition=t1.sbDpClose < t0.max_anything_sbDpClose & t0.anything_sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) - AGGREGATE(keys={'anything_sbTickerType': anything_sbTickerType}, aggregations={'max_anything_sbDpClose': MAX(anything_sbDpClose)}) - AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'anything_sbDpClose': ANYTHING(sbDpClose), 'anything_sbTickerType': ANYTHING(sbTickerType)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t0.sbDpTickerId, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t1.sbDpClose < t0.max_sbDpClose & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) + AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) + AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.max_sbDpClose, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index c284ce3c8..f7be3c1ff 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.anything_max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'anything_max_sbTxShares': ANYTHING(max_sbTxShares), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId), 'max_max_sbTxShares': MAX(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'max_max_sbTxShares': t0.max_sbTxShares, 'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_sql_refsols/correl_29_sqlite.sql b/tests/test_sql_refsols/correl_29_sqlite.sql index ab54a3144..5ec76c6d8 100644 --- a/tests/test_sql_refsols/correl_29_sqlite.sql +++ b/tests/test_sql_refsols/correl_29_sqlite.sql @@ -23,19 +23,7 @@ WITH _t5 AS ( ON _s1.avg_c_acctbal < _s3.c_acctbal AND _s3.c_nationkey = nation.n_nationkey GROUP BY 1 -), _t6 AS ( - SELECT - s_acctbal, - s_nationkey - FROM tpch.supplier -), _s7 AS ( - SELECT - s_nationkey, - AVG(s_acctbal) AS avg_s_acctbal - FROM _t6 - GROUP BY - 1 -), _t1 AS ( +), _s10 AS ( SELECT _s5.c_nationkey, MAX(_t3.anything_n_name) AS anything_anything_n_name, @@ -46,27 +34,39 @@ WITH _t5 AS ( FROM _t3 AS _t3 JOIN _t5 AS _s5 ON _s5.c_nationkey = _t3.n_nationkey - JOIN tpch.nation AS nation - ON _s5.c_nationkey = nation.n_nationkey - JOIN _s7 AS _s7 - ON _s7.s_nationkey = nation.n_nationkey - JOIN _t6 AS _s9 - ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey WHERE _t3.anything_n_regionkey IN (1, 3) GROUP BY 1 +), _t6 AS ( + SELECT + s_acctbal, + s_nationkey + FROM tpch.supplier +), _s7 AS ( + SELECT + s_nationkey, + AVG(s_acctbal) AS avg_s_acctbal + FROM _t6 + GROUP BY + 1 ) SELECT - MAX(anything_anything_n_regionkey) AS region_key, - MAX(anything_anything_n_name) AS nation_name, - MAX(anything_n_rows) AS n_above_avg_customers, + MAX(_s10.anything_anything_n_regionkey) AS region_key, + MAX(_s10.anything_anything_n_name) AS nation_name, + MAX(_s10.anything_n_rows) AS n_above_avg_customers, COUNT(*) AS n_above_avg_suppliers, - MAX(min_c_acctbal) AS min_cust_acctbal, - MAX(max_c_acctbal) AS max_cust_acctbal -FROM _t1 + MAX(_s10.min_c_acctbal) AS min_cust_acctbal, + MAX(_s10.max_c_acctbal) AS max_cust_acctbal +FROM _s10 AS _s10 +JOIN tpch.nation AS nation + ON _s10.c_nationkey = nation.n_nationkey +JOIN _s7 AS _s7 + ON _s7.s_nationkey = nation.n_nationkey +JOIN _t6 AS _s9 + ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey GROUP BY - c_nationkey + nation.n_nationkey ORDER BY 1, 2 From 7d8ac06537bfc3323797b60b90889abe0ce92e0b Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 20:50:08 -0400 Subject: [PATCH 107/143] [RUN CI] From 036230ad33118c4ad00b029e43cf958fd8c6c145 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 22:25:33 -0400 Subject: [PATCH 108/143] Patch to make common_prefix_al faster due to sqlite issues [RUN CI] --- tests/test_plan_refsols/common_prefix_al.txt | 6 +++--- .../common_prefix_pydough_functions.py | 21 ++++++++++++++++++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 77788a35d..13a125136 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -11,12 +11,12 @@ ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_anything SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric & ISIN(l_partkey, [53360, 123069, 132776, 62217, 67393, 87784, 148252, 176947, 196620, 103099, 169275]:array[unknown]), columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric & ISIN(l_partkey, [53360, 123069, 132776, 62217, 67393, 87784, 148252, 176947, 196620, 103099, 169275]:array[unknown]), columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_pydough_functions/common_prefix_pydough_functions.py b/tests/test_pydough_functions/common_prefix_pydough_functions.py index 979381fc2..9dab96308 100644 --- a/tests/test_pydough_functions/common_prefix_pydough_functions.py +++ b/tests/test_pydough_functions/common_prefix_pydough_functions.py @@ -698,7 +698,26 @@ def common_prefix_al(): # For each remaining customer, list their key, number of orders made, and # number of lineitems without tax/discount made. When choosing the top 10 # customers, pick the 10 with the lowest key values. - selected_lines = orders.lines.WHERE((tax == 0) & (discount == 0)) + selected_lines = orders.lines.WHERE( + (tax == 0) + & (discount == 0) + & ISIN( + part_key, + ( + 53360, + 123069, + 132776, + 62217, + 67393, + 87784, + 148252, + 176947, + 196620, + 103099, + 169275, + ), + ) + ) selected_part_purchase = selected_lines.part.WHERE(size < 15) return ( nations.customers.CALCULATE(n_orders=COUNT(orders)) From 712d04587baa45e6485f6cb582085ee16cbca44f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 17 Oct 2025 22:41:34 -0400 Subject: [PATCH 109/143] Minor updates to the defog graph --- tests/test_metadata/defog_graphs.json | 22 ++++++------ .../defog_dealership_adv12_ansi.sql | 34 ++++++------------- .../defog_dealership_adv12_mysql.sql | 34 ++++++------------- .../defog_dealership_adv12_postgres.sql | 34 ++++++------------- .../defog_dealership_adv12_snowflake.sql | 34 ++++++------------- .../defog_dealership_adv12_sqlite.sql | 34 ++++++------------- 6 files changed, 61 insertions(+), 131 deletions(-) diff --git a/tests/test_metadata/defog_graphs.json b/tests/test_metadata/defog_graphs.json index 09c2860db..bab438ab9 100644 --- a/tests/test_metadata/defog_graphs.json +++ b/tests/test_metadata/defog_graphs.json @@ -7,7 +7,7 @@ "name": "customers", "type": "simple table", "table path": "main.sbCustomer", - "unique properties": ["_id"], + "unique properties": ["_id", "name", "email", "address1"], "properties": [ { "name": "_id", @@ -620,7 +620,7 @@ "name": "salespeople", "type": "simple table", "table path": "main.salespersons", - "unique properties": ["_id", "email"], + "unique properties": ["_id", "email", "phone", ["first_name", "last_name"]], "properties": [ { "name": "_id", @@ -698,7 +698,7 @@ "name": "customers", "type": "simple table", "table path": "main.customers", - "unique properties": ["_id", "email"], + "unique properties": ["_id", "email", "phone", "address", ["first_name", "last_name"]], "properties": [ { "name": "_id", @@ -1013,7 +1013,7 @@ "name": "inventory_snapshots", "type": "simple table", "table path": "main.inventory_snapshots", - "unique properties": ["_id"], + "unique properties": ["_id", ["car_id", "snapshot_date"]], "properties": [ { "name": "_id", @@ -1201,7 +1201,7 @@ "name": "doctors", "type": "simple table", "table path": "main.doctors", - "unique properties": ["doc_id"], + "unique properties": ["doc_id", "first_name", "last_name", "board_certification_number"], "properties": [ { "name": "doc_id", @@ -1300,7 +1300,7 @@ "name": "patients", "type": "simple table", "table path": "main.patients", - "unique properties": ["patient_id", "email", "phone"], + "unique properties": ["patient_id", "email", "phone", "first_name"], "properties": [ { "name": "patient_id", @@ -1374,7 +1374,7 @@ { "name": "street_address", "type": "table column", - "column name": "addr_city", + "column name": "addr_street", "data type": "string", "description": "The street address of the patient's home, including street name and number", "sample values": ["987 Birch Dr", "753 Walnut Ave", "951 Spruce Blvd"], @@ -1451,7 +1451,7 @@ "name": "drugs", "type": "simple table", "table path": "main.drugs", - "unique properties": ["drug_id", "national_drug_code"], + "unique properties": ["drug_id", "national_drug_code", "drug_name"], "properties": [ { "name": "drug_id", @@ -1559,7 +1559,7 @@ "name": "diagnoses", "type": "simple table", "table path": "main.diagnoses", - "unique properties": ["_id", "code", "name"], + "unique properties": ["_id", "code", "name", "description"], "properties": [ { "name": "_id", @@ -1702,7 +1702,7 @@ "name": "outcomes", "type": "simple table", "table path": "main.outcomes", - "unique properties": ["outcome_id"], + "unique properties": ["outcome_id", ["treatment_id", "assessment_date"]], "properties": [ { "name": "outcome_id", @@ -1953,7 +1953,7 @@ "name": "adverse_events", "type": "simple table", "table path": "main.adverse_events", - "unique properties": ["_id"], + "unique properties": ["_id", "treatment_id"], "properties": [ { "name": "_id", diff --git a/tests/test_sql_refsols/defog_dealership_adv12_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv12_ansi.sql index 7eb2d10dc..ab2c3cee5 100644 --- a/tests/test_sql_refsols/defog_dealership_adv12_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv12_ansi.sql @@ -1,28 +1,14 @@ -WITH _t1 AS ( - SELECT - cars._id AS _id_1, - ANY_VALUE(sales.car_id) AS anything_car_id, - ANY_VALUE(cars.make) AS anything_make, - ANY_VALUE(cars.model) AS anything_model, - ANY_VALUE(sales.sale_price) AS anything_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id - JOIN main.inventory_snapshots AS inventory_snapshots - ON NOT inventory_snapshots.is_in_inventory - AND cars._id = inventory_snapshots.car_id - AND inventory_snapshots.snapshot_date = sales.sale_date - GROUP BY - sales._id, - 1 -) SELECT - anything_make AS make, - anything_model AS model, - anything_sale_price AS sale_price -FROM _t1 -WHERE - _id_1 = anything_car_id + cars.make, + cars.model, + sales.sale_price +FROM main.sales AS sales +JOIN main.cars AS cars + ON cars._id = sales.car_id +JOIN main.inventory_snapshots AS inventory_snapshots + ON NOT inventory_snapshots.is_in_inventory + AND cars._id = inventory_snapshots.car_id + AND inventory_snapshots.snapshot_date = sales.sale_date ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv12_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv12_mysql.sql index 7eb2d10dc..ab2c3cee5 100644 --- a/tests/test_sql_refsols/defog_dealership_adv12_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv12_mysql.sql @@ -1,28 +1,14 @@ -WITH _t1 AS ( - SELECT - cars._id AS _id_1, - ANY_VALUE(sales.car_id) AS anything_car_id, - ANY_VALUE(cars.make) AS anything_make, - ANY_VALUE(cars.model) AS anything_model, - ANY_VALUE(sales.sale_price) AS anything_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id - JOIN main.inventory_snapshots AS inventory_snapshots - ON NOT inventory_snapshots.is_in_inventory - AND cars._id = inventory_snapshots.car_id - AND inventory_snapshots.snapshot_date = sales.sale_date - GROUP BY - sales._id, - 1 -) SELECT - anything_make AS make, - anything_model AS model, - anything_sale_price AS sale_price -FROM _t1 -WHERE - _id_1 = anything_car_id + cars.make, + cars.model, + sales.sale_price +FROM main.sales AS sales +JOIN main.cars AS cars + ON cars._id = sales.car_id +JOIN main.inventory_snapshots AS inventory_snapshots + ON NOT inventory_snapshots.is_in_inventory + AND cars._id = inventory_snapshots.car_id + AND inventory_snapshots.snapshot_date = sales.sale_date ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv12_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv12_postgres.sql index 5338986f0..f8f74f3ce 100644 --- a/tests/test_sql_refsols/defog_dealership_adv12_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv12_postgres.sql @@ -1,28 +1,14 @@ -WITH _t1 AS ( - SELECT - cars._id AS _id_1, - MAX(sales.car_id) AS anything_car_id, - MAX(cars.make) AS anything_make, - MAX(cars.model) AS anything_model, - MAX(sales.sale_price) AS anything_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id - JOIN main.inventory_snapshots AS inventory_snapshots - ON NOT inventory_snapshots.is_in_inventory - AND cars._id = inventory_snapshots.car_id - AND inventory_snapshots.snapshot_date = sales.sale_date - GROUP BY - sales._id, - 1 -) SELECT - anything_make AS make, - anything_model AS model, - anything_sale_price AS sale_price -FROM _t1 -WHERE - _id_1 = anything_car_id + cars.make, + cars.model, + sales.sale_price +FROM main.sales AS sales +JOIN main.cars AS cars + ON cars._id = sales.car_id +JOIN main.inventory_snapshots AS inventory_snapshots + ON NOT inventory_snapshots.is_in_inventory + AND cars._id = inventory_snapshots.car_id + AND inventory_snapshots.snapshot_date = sales.sale_date ORDER BY 3 DESC NULLS LAST LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv12_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv12_snowflake.sql index b0d7fdefb..f8f74f3ce 100644 --- a/tests/test_sql_refsols/defog_dealership_adv12_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv12_snowflake.sql @@ -1,28 +1,14 @@ -WITH _t1 AS ( - SELECT - cars._id AS _id_1, - ANY_VALUE(sales.car_id) AS anything_car_id, - ANY_VALUE(cars.make) AS anything_make, - ANY_VALUE(cars.model) AS anything_model, - ANY_VALUE(sales.sale_price) AS anything_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id - JOIN main.inventory_snapshots AS inventory_snapshots - ON NOT inventory_snapshots.is_in_inventory - AND cars._id = inventory_snapshots.car_id - AND inventory_snapshots.snapshot_date = sales.sale_date - GROUP BY - sales._id, - 1 -) SELECT - anything_make AS make, - anything_model AS model, - anything_sale_price AS sale_price -FROM _t1 -WHERE - _id_1 = anything_car_id + cars.make, + cars.model, + sales.sale_price +FROM main.sales AS sales +JOIN main.cars AS cars + ON cars._id = sales.car_id +JOIN main.inventory_snapshots AS inventory_snapshots + ON NOT inventory_snapshots.is_in_inventory + AND cars._id = inventory_snapshots.car_id + AND inventory_snapshots.snapshot_date = sales.sale_date ORDER BY 3 DESC NULLS LAST LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv12_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv12_sqlite.sql index 8972af49b..ab2c3cee5 100644 --- a/tests/test_sql_refsols/defog_dealership_adv12_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv12_sqlite.sql @@ -1,28 +1,14 @@ -WITH _t1 AS ( - SELECT - cars._id AS _id_1, - MAX(sales.car_id) AS anything_car_id, - MAX(cars.make) AS anything_make, - MAX(cars.model) AS anything_model, - MAX(sales.sale_price) AS anything_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id - JOIN main.inventory_snapshots AS inventory_snapshots - ON NOT inventory_snapshots.is_in_inventory - AND cars._id = inventory_snapshots.car_id - AND inventory_snapshots.snapshot_date = sales.sale_date - GROUP BY - sales._id, - 1 -) SELECT - anything_make AS make, - anything_model AS model, - anything_sale_price AS sale_price -FROM _t1 -WHERE - _id_1 = anything_car_id + cars.make, + cars.model, + sales.sale_price +FROM main.sales AS sales +JOIN main.cars AS cars + ON cars._id = sales.car_id +JOIN main.inventory_snapshots AS inventory_snapshots + ON NOT inventory_snapshots.is_in_inventory + AND cars._id = inventory_snapshots.car_id + AND inventory_snapshots.snapshot_date = sales.sale_date ORDER BY 3 DESC LIMIT 1 From 1b43fbd8f806fc906f48eae76195095b010ffe47 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 20 Oct 2025 14:57:01 -0400 Subject: [PATCH 110/143] Adding start of the LEFT case, minus COUNT(*) --- .../conversion/join_aggregate_transpose.py | 27 ++++ .../customer_largest_order_deltas.txt | 16 +-- .../quantile_function_test_2.txt | 14 +-- .../quantile_function_test_3.txt | 14 +-- .../quantile_function_test_4.txt | 14 +-- .../supplier_pct_national_qty.txt | 20 +-- .../window_filter_order_9.txt | 12 +- .../conditional_functions_ansi.sql | 22 ++-- .../conditional_functions_mysql.sql | 22 ++-- .../conditional_functions_postgres.sql | 18 +-- .../conditional_functions_snowflake.sql | 22 ++-- .../conditional_functions_sqlite.sql | 18 +-- .../defog_dealership_adv6_ansi.sql | 18 +-- .../defog_dealership_adv6_mysql.sql | 12 +- .../defog_dealership_adv6_postgres.sql | 12 +- .../defog_dealership_adv6_snowflake.sql | 18 +-- .../defog_dealership_adv6_sqlite.sql | 12 +- .../defog_dermtreatment_adv5_ansi.sql | 16 ++- .../defog_dermtreatment_adv5_mysql.sql | 16 ++- .../defog_dermtreatment_adv5_postgres.sql | 16 ++- .../defog_dermtreatment_adv5_snowflake.sql | 16 ++- .../defog_dermtreatment_adv5_sqlite.sql | 16 ++- .../defog_ewallet_adv12_ansi.sql | 12 +- .../defog_ewallet_adv12_mysql.sql | 12 +- .../defog_ewallet_adv12_postgres.sql | 12 +- .../defog_ewallet_adv12_snowflake.sql | 12 +- .../defog_ewallet_adv12_sqlite.sql | 12 +- .../defog_ewallet_gen4_ansi.sql | 39 +++--- .../defog_ewallet_gen4_mysql.sql | 39 +++--- .../defog_ewallet_gen4_postgres.sql | 39 +++--- .../defog_ewallet_gen4_snowflake.sql | 39 +++--- .../defog_ewallet_gen4_sqlite.sql | 39 +++--- .../test_sql_refsols/quantile_test_2_ansi.sql | 54 ++++---- .../quantile_test_2_mysql.sql | 117 ++++++++---------- .../quantile_test_2_postgres.sql | 54 ++++---- .../quantile_test_2_snowflake.sql | 54 ++++---- .../quantile_test_2_sqlite.sql | 100 +++++++-------- 37 files changed, 530 insertions(+), 475 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index 8b66c4865..9b81026be 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -32,6 +32,23 @@ class JoinAggregateTransposeShuttle(RelationalShuttle): TODO """ + left_join_case_ops = { + # pydop.COUNT, + pydop.MIN, + pydop.MAX, + pydop.SUM, + pydop.ANYTHING, + pydop.MEDIAN, + pydop.QUANTILE, + pydop.SAMPLE_VAR, + pydop.SAMPLE_STD, + pydop.POPULATION_VAR, + pydop.POPULATION_STD, + } + """ + TODO: add description + """ + def __init__(self): self.finder: ColumnReferenceFinder = ColumnReferenceFinder() @@ -102,6 +119,15 @@ def join_aggregate_transpose( join.cardinality if is_left else join.reverse_cardinality ) + left_join_case = ( + join.join_type == JoinType.LEFT + and not is_left + and all( + agg.op in JoinAggregateTransposeShuttle.left_join_case_ops + for agg in aggregate.aggregations.values() + ) + ) + # Verify the cardinality meets the specified criteria, and that the join # type is INNER/SEMI (since LEFT would not be filtering), where SEMI is # only allowed if the aggregation is on the left. @@ -109,6 +135,7 @@ def join_aggregate_transpose( ( (join.join_type == JoinType.INNER) or (join.join_type == JoinType.SEMI and is_left) + or left_join_case ) and cardinality.filters and cardinality.singular diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 30ca37e9a..0105b05f2 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,13 +1,13 @@ ROOT(columns=[('name', anything_c_name), ('largest_diff', IFF(ABS(min_revenue_delta) > max_revenue_delta, min_revenue_delta, max_revenue_delta))], orderings=[(IFF(ABS(min_revenue_delta) > max_revenue_delta, min_revenue_delta, max_revenue_delta)):desc_last], limit=5:numeric) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'max_revenue_delta': MAX(revenue_delta), 'min_revenue_delta': MIN(revenue_delta)}) - PROJECT(columns={'c_name': c_name, 'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'sum_r': t1.sum_r}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'max_revenue_delta': MAX(revenue_delta), 'min_revenue_delta': MIN(revenue_delta)}) + PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'c_name': c_name, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[anything_o_custkey], order=[(anything_o_orderdate):asc_last])}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'anything_o_orderdate': t1.anything_o_orderdate, 'c_name': t0.c_name, 'sum_r': t1.sum_r}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) - FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[anything_o_custkey], order=[(anything_o_orderdate):asc_last])), columns={'anything_o_custkey': anything_o_custkey, 'anything_o_orderdate': anything_o_orderdate, 'sum_r': sum_r}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'anything_o_orderdate': ANYTHING(o_orderdate), 'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index 39d7a7218..cf70ffe8c 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) +ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index 39d7a7218..cf70ffe8c 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) +ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index 1fe7deeea..453042d6a 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) +ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index cf94b18f6..1f1fd0b2e 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,13 +1,13 @@ -ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=s_acctbal >= 0.0:numeric & CONTAINS(s_comment, 'careful':string), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) +ROOT(columns=[('supplier_name', anything_s_name), ('nation_name', anything_n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[anything_s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[anything_s_nationkey], order=[])):desc_last], limit=5:numeric) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_s_name': ANYTHING(s_name), 'anything_s_nationkey': ANYTHING(s_nationkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=s_acctbal >= 0.0:numeric & CONTAINS(s_comment, 'careful':string), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) FILTER(condition=YEAR(l_shipdate) == 1995:numeric & l_shipmode == 'SHIP':string, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/window_filter_order_9.txt b/tests/test_plan_refsols/window_filter_order_9.txt index 6c9e87b17..2b6b54e83 100644 --- a/tests/test_plan_refsols/window_filter_order_9.txt +++ b/tests/test_plan_refsols/window_filter_order_9.txt @@ -1,12 +1,12 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=ABSENT(expr_0) & o_totalprice < 0.05:numeric * RELAVG(args=[total_spent], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) + JOIN(condition=t0.o_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - PROJECT(columns={'c_custkey': c_custkey, 'expr_0': 1:numeric, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'expr_0': 1:numeric, 'o_custkey': o_custkey, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'o_totalprice': t1.o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_sql_refsols/conditional_functions_ansi.sql b/tests/test_sql_refsols/conditional_functions_ansi.sql index 6e09fcd6f..a2358fbe5 100644 --- a/tests/test_sql_refsols/conditional_functions_ansi.sql +++ b/tests/test_sql_refsols/conditional_functions_ansi.sql @@ -1,20 +1,24 @@ WITH _s1 AS ( SELECT o_custkey, - MIN(o_totalprice) AS min_o_totalprice + o_totalprice FROM tpch.orders - GROUP BY - 1 ) SELECT - CASE WHEN customer.c_acctbal > 1000 THEN 'High' ELSE 'Low' END AS iff_col, - customer.c_name IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(_s1.min_o_totalprice, 0.0) AS default_val, - NOT _s1.min_o_totalprice IS NULL AS has_acct_bal, - _s1.min_o_totalprice IS NULL AS no_acct_bal, - CASE WHEN customer.c_acctbal > 0 THEN customer.c_acctbal ELSE NULL END AS no_debt_bal + CASE WHEN ANY_VALUE(customer.c_acctbal) > 1000 THEN 'High' ELSE 'Low' END AS iff_col, + ANY_VALUE(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, + COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, + NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, + MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + CASE + WHEN ANY_VALUE(customer.c_acctbal) > 0 + THEN ANY_VALUE(customer.c_acctbal) + ELSE NULL + END AS no_debt_bal FROM tpch.customer AS customer LEFT JOIN _s1 AS _s1 ON _s1.o_custkey = customer.c_custkey WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 +GROUP BY + _s1.o_custkey diff --git a/tests/test_sql_refsols/conditional_functions_mysql.sql b/tests/test_sql_refsols/conditional_functions_mysql.sql index 76aa8fcd2..664859f62 100644 --- a/tests/test_sql_refsols/conditional_functions_mysql.sql +++ b/tests/test_sql_refsols/conditional_functions_mysql.sql @@ -1,20 +1,24 @@ WITH _s1 AS ( SELECT o_custkey, - MIN(o_totalprice) AS min_o_totalprice + o_totalprice FROM tpch.ORDERS - GROUP BY - 1 ) SELECT - CASE WHEN CUSTOMER.c_acctbal > 1000 THEN 'High' ELSE 'Low' END AS iff_col, - CUSTOMER.c_name IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(_s1.min_o_totalprice, 0.0) AS default_val, - NOT _s1.min_o_totalprice IS NULL AS has_acct_bal, - _s1.min_o_totalprice IS NULL AS no_acct_bal, - CASE WHEN CUSTOMER.c_acctbal > 0 THEN CUSTOMER.c_acctbal ELSE NULL END AS no_debt_bal + CASE WHEN ANY_VALUE(CUSTOMER.c_acctbal) > 1000 THEN 'High' ELSE 'Low' END AS iff_col, + ANY_VALUE(CUSTOMER.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, + COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, + NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, + MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + CASE + WHEN ANY_VALUE(CUSTOMER.c_acctbal) > 0 + THEN ANY_VALUE(CUSTOMER.c_acctbal) + ELSE NULL + END AS no_debt_bal FROM tpch.CUSTOMER AS CUSTOMER LEFT JOIN _s1 AS _s1 ON CUSTOMER.c_custkey = _s1.o_custkey WHERE CUSTOMER.c_acctbal <= 1000 AND CUSTOMER.c_acctbal >= 100 +GROUP BY + _s1.o_custkey diff --git a/tests/test_sql_refsols/conditional_functions_postgres.sql b/tests/test_sql_refsols/conditional_functions_postgres.sql index 6e09fcd6f..2b912c9d7 100644 --- a/tests/test_sql_refsols/conditional_functions_postgres.sql +++ b/tests/test_sql_refsols/conditional_functions_postgres.sql @@ -1,20 +1,20 @@ WITH _s1 AS ( SELECT o_custkey, - MIN(o_totalprice) AS min_o_totalprice + o_totalprice FROM tpch.orders - GROUP BY - 1 ) SELECT - CASE WHEN customer.c_acctbal > 1000 THEN 'High' ELSE 'Low' END AS iff_col, - customer.c_name IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(_s1.min_o_totalprice, 0.0) AS default_val, - NOT _s1.min_o_totalprice IS NULL AS has_acct_bal, - _s1.min_o_totalprice IS NULL AS no_acct_bal, - CASE WHEN customer.c_acctbal > 0 THEN customer.c_acctbal ELSE NULL END AS no_debt_bal + CASE WHEN MAX(customer.c_acctbal) > 1000 THEN 'High' ELSE 'Low' END AS iff_col, + MAX(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, + COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, + NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, + MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + CASE WHEN MAX(customer.c_acctbal) > 0 THEN MAX(customer.c_acctbal) ELSE NULL END AS no_debt_bal FROM tpch.customer AS customer LEFT JOIN _s1 AS _s1 ON _s1.o_custkey = customer.c_custkey WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 +GROUP BY + _s1.o_custkey diff --git a/tests/test_sql_refsols/conditional_functions_snowflake.sql b/tests/test_sql_refsols/conditional_functions_snowflake.sql index 1b93205be..8b6df1849 100644 --- a/tests/test_sql_refsols/conditional_functions_snowflake.sql +++ b/tests/test_sql_refsols/conditional_functions_snowflake.sql @@ -1,20 +1,24 @@ WITH _s1 AS ( SELECT o_custkey, - MIN(o_totalprice) AS min_o_totalprice + o_totalprice FROM tpch.orders - GROUP BY - 1 ) SELECT - IFF(customer.c_acctbal > 1000, 'High', 'Low') AS iff_col, - customer.c_name IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(_s1.min_o_totalprice, 0.0) AS default_val, - NOT _s1.min_o_totalprice IS NULL AS has_acct_bal, - _s1.min_o_totalprice IS NULL AS no_acct_bal, - CASE WHEN customer.c_acctbal > 0 THEN customer.c_acctbal ELSE NULL END AS no_debt_bal + IFF(ANY_VALUE(customer.c_acctbal) > 1000, 'High', 'Low') AS iff_col, + ANY_VALUE(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, + COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, + NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, + MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + CASE + WHEN ANY_VALUE(customer.c_acctbal) > 0 + THEN ANY_VALUE(customer.c_acctbal) + ELSE NULL + END AS no_debt_bal FROM tpch.customer AS customer LEFT JOIN _s1 AS _s1 ON _s1.o_custkey = customer.c_custkey WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 +GROUP BY + _s1.o_custkey diff --git a/tests/test_sql_refsols/conditional_functions_sqlite.sql b/tests/test_sql_refsols/conditional_functions_sqlite.sql index 940b627a9..49b3424a8 100644 --- a/tests/test_sql_refsols/conditional_functions_sqlite.sql +++ b/tests/test_sql_refsols/conditional_functions_sqlite.sql @@ -1,20 +1,20 @@ WITH _s1 AS ( SELECT o_custkey, - MIN(o_totalprice) AS min_o_totalprice + o_totalprice FROM tpch.orders - GROUP BY - 1 ) SELECT - IIF(customer.c_acctbal > 1000, 'High', 'Low') AS iff_col, - customer.c_name IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(_s1.min_o_totalprice, 0.0) AS default_val, - NOT _s1.min_o_totalprice IS NULL AS has_acct_bal, - _s1.min_o_totalprice IS NULL AS no_acct_bal, - CASE WHEN customer.c_acctbal > 0 THEN customer.c_acctbal ELSE NULL END AS no_debt_bal + IIF(MAX(customer.c_acctbal) > 1000, 'High', 'Low') AS iff_col, + MAX(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, + COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, + NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, + MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + CASE WHEN MAX(customer.c_acctbal) > 0 THEN MAX(customer.c_acctbal) ELSE NULL END AS no_debt_bal FROM tpch.customer AS customer LEFT JOIN _s1 AS _s1 ON _s1.o_custkey = customer.c_custkey WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 +GROUP BY + _s1.o_custkey diff --git a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql index be8d9ee7f..1f5c1833f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t2 AS ( SELECT car_id FROM main.inventory_snapshots @@ -8,19 +8,19 @@ WITH _t1 AS ( ), _s3 AS ( SELECT car_id, - MAX(sale_price) AS max_sale_price + sale_price FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - _s3.max_sale_price AS highest_sale_price + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + MAX(_s3.sale_price) AS highest_sale_price FROM main.cars AS cars -JOIN _t1 AS _t1 - ON _t1.car_id = cars._id +JOIN _t2 AS _t2 + ON _t2.car_id = cars._id LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id +GROUP BY + _s3.car_id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql index 6645daaa0..8e9481527 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql @@ -7,19 +7,19 @@ WITH _t AS ( ), _s3 AS ( SELECT car_id, - MAX(sale_price) AS max_sale_price + sale_price FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - _s3.max_sale_price AS highest_sale_price + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + MAX(_s3.sale_price) AS highest_sale_price FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id +GROUP BY + _s3.car_id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql index 8deeb5bbf..1b75d1711 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql @@ -7,19 +7,19 @@ WITH _t AS ( ), _s3 AS ( SELECT car_id, - MAX(sale_price) AS max_sale_price + sale_price FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - _s3.max_sale_price AS highest_sale_price + MAX(cars.make) AS make, + MAX(cars.model) AS model, + MAX(_s3.sale_price) AS highest_sale_price FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id +GROUP BY + _s3.car_id ORDER BY 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql index 579066595..4dfa78a81 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t2 AS ( SELECT car_id FROM main.inventory_snapshots @@ -8,19 +8,19 @@ WITH _t1 AS ( ), _s3 AS ( SELECT car_id, - MAX(sale_price) AS max_sale_price + sale_price FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - _s3.max_sale_price AS highest_sale_price + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + MAX(_s3.sale_price) AS highest_sale_price FROM main.cars AS cars -JOIN _t1 AS _t1 - ON _t1.car_id = cars._id +JOIN _t2 AS _t2 + ON _t2.car_id = cars._id LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id +GROUP BY + _s3.car_id ORDER BY 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql index 4183cfea8..78e7d7c01 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql @@ -7,19 +7,19 @@ WITH _t AS ( ), _s3 AS ( SELECT car_id, - MAX(sale_price) AS max_sale_price + sale_price FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - _s3.max_sale_price AS highest_sale_price + MAX(cars.make) AS make, + MAX(cars.model) AS model, + MAX(_s3.sale_price) AS highest_sale_price FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id +GROUP BY + _s3.car_id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql index 27af44320..9b15444aa 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql @@ -1,19 +1,23 @@ WITH _s3 AS ( SELECT patient_id, - MIN(EXTRACT(YEAR FROM CAST(start_dt AS DATETIME))) AS min_year_start_dt + start_dt FROM main.treatments - GROUP BY - 1 -), _t0 AS ( +), _t1 AS ( SELECT - _s3.min_year_start_dt, - COUNT(*) AS n_rows + MIN(EXTRACT(YEAR FROM CAST(_s3.start_dt AS DATETIME))) AS min_year_start_dt FROM main.patients AS patients JOIN main.treatments AS treatments ON patients.patient_id = treatments.patient_id LEFT JOIN _s3 AS _s3 ON _s3.patient_id = patients.patient_id + GROUP BY + _s3.patient_id +), _t0 AS ( + SELECT + min_year_start_dt, + COUNT(*) AS n_rows + FROM _t1 GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql index 91808179f..da0175bb0 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql @@ -7,14 +7,11 @@ WITH _u_0 AS ( ), _s3 AS ( SELECT patient_id, - MIN(EXTRACT(YEAR FROM CAST(start_dt AS DATETIME))) AS min_year_start_dt + start_dt FROM main.treatments - GROUP BY - 1 -), _t0 AS ( +), _t1 AS ( SELECT - _s3.min_year_start_dt, - COUNT(*) AS n_rows + MIN(EXTRACT(YEAR FROM CAST(_s3.start_dt AS DATETIME))) AS min_year_start_dt FROM main.patients AS patients LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = patients.patient_id @@ -22,6 +19,13 @@ WITH _u_0 AS ( ON _s3.patient_id = patients.patient_id WHERE NOT _u_0._u_1 IS NULL + GROUP BY + _s3.patient_id +), _t0 AS ( + SELECT + min_year_start_dt, + COUNT(*) AS n_rows + FROM _t1 GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql index 3399479cd..dab36dc15 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql @@ -7,14 +7,11 @@ WITH _u_0 AS ( ), _s3 AS ( SELECT patient_id, - MIN(EXTRACT(YEAR FROM CAST(start_dt AS TIMESTAMP))) AS min_year_start_dt + start_dt FROM main.treatments - GROUP BY - 1 -), _t0 AS ( +), _t1 AS ( SELECT - _s3.min_year_start_dt, - COUNT(*) AS n_rows + MIN(EXTRACT(YEAR FROM CAST(_s3.start_dt AS TIMESTAMP))) AS min_year_start_dt FROM main.patients AS patients LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = patients.patient_id @@ -22,6 +19,13 @@ WITH _u_0 AS ( ON _s3.patient_id = patients.patient_id WHERE NOT _u_0._u_1 IS NULL + GROUP BY + _s3.patient_id +), _t0 AS ( + SELECT + min_year_start_dt, + COUNT(*) AS n_rows + FROM _t1 GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql index 5f3d0ed63..246a11650 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql @@ -7,14 +7,11 @@ WITH _u_0 AS ( ), _s3 AS ( SELECT patient_id, - MIN(YEAR(CAST(start_dt AS TIMESTAMP))) AS min_year_start_dt + start_dt FROM main.treatments - GROUP BY - 1 -), _t0 AS ( +), _t1 AS ( SELECT - _s3.min_year_start_dt, - COUNT(*) AS n_rows + MIN(YEAR(CAST(_s3.start_dt AS TIMESTAMP))) AS min_year_start_dt FROM main.patients AS patients LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = patients.patient_id @@ -22,6 +19,13 @@ WITH _u_0 AS ( ON _s3.patient_id = patients.patient_id WHERE NOT _u_0._u_1 IS NULL + GROUP BY + _s3.patient_id +), _t0 AS ( + SELECT + min_year_start_dt, + COUNT(*) AS n_rows + FROM _t1 GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql index aac28d505..8ee828136 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql @@ -7,14 +7,11 @@ WITH _u_0 AS ( ), _s3 AS ( SELECT patient_id, - MIN(CAST(STRFTIME('%Y', start_dt) AS INTEGER)) AS min_year_start_dt + start_dt FROM main.treatments - GROUP BY - 1 -), _t0 AS ( +), _t1 AS ( SELECT - _s3.min_year_start_dt, - COUNT(*) AS n_rows + MIN(CAST(STRFTIME('%Y', _s3.start_dt) AS INTEGER)) AS min_year_start_dt FROM main.patients AS patients LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = patients.patient_id @@ -22,6 +19,13 @@ WITH _u_0 AS ( ON _s3.patient_id = patients.patient_id WHERE NOT _u_0._u_1 IS NULL + GROUP BY + _s3.patient_id +), _t0 AS ( + SELECT + min_year_start_dt, + COUNT(*) AS n_rows + FROM _t1 GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql index 4d9ff37da..f7b7df7b3 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql @@ -1,16 +1,16 @@ WITH _s1 AS ( SELECT - coupon_id, - SUM(amount) AS sum_amount + amount, + coupon_id FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.cid AS coupon_id, - COALESCE(_s1.sum_amount, 0) AS total_discount + _s1.coupon_id, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid WHERE coupons.merchant_id = '1' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql index 4d9ff37da..f7b7df7b3 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql @@ -1,16 +1,16 @@ WITH _s1 AS ( SELECT - coupon_id, - SUM(amount) AS sum_amount + amount, + coupon_id FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.cid AS coupon_id, - COALESCE(_s1.sum_amount, 0) AS total_discount + _s1.coupon_id, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid WHERE coupons.merchant_id = '1' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql index 4d9ff37da..f7b7df7b3 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql @@ -1,16 +1,16 @@ WITH _s1 AS ( SELECT - coupon_id, - SUM(amount) AS sum_amount + amount, + coupon_id FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.cid AS coupon_id, - COALESCE(_s1.sum_amount, 0) AS total_discount + _s1.coupon_id, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid WHERE coupons.merchant_id = '1' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql index 4d9ff37da..f7b7df7b3 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql @@ -1,16 +1,16 @@ WITH _s1 AS ( SELECT - coupon_id, - SUM(amount) AS sum_amount + amount, + coupon_id FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.cid AS coupon_id, - COALESCE(_s1.sum_amount, 0) AS total_discount + _s1.coupon_id, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid WHERE coupons.merchant_id = '1' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql index 4d9ff37da..f7b7df7b3 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql @@ -1,16 +1,16 @@ WITH _s1 AS ( SELECT - coupon_id, - SUM(amount) AS sum_amount + amount, + coupon_id FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.cid AS coupon_id, - COALESCE(_s1.sum_amount, 0) AS total_discount + _s1.coupon_id, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid WHERE coupons.merchant_id = '1' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql b/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql index 805d5b52f..a2979cbc9 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT merchant_id, start_date @@ -7,29 +7,36 @@ WITH _t0 AS ( SELECT merchant_id, MIN(start_date) AS min_start_date - FROM _t0 + FROM _t1 GROUP BY 1 ), _s3 AS ( SELECT + cid, merchant_id, - start_date, - MAX(cid) AS max_cid + start_date FROM main.coupons +), _s4 AS ( + SELECT + _s3.merchant_id, + _s3.start_date, + ANY_VALUE(merchants.created_at) AS anything_created_at, + MAX(_s3.cid) AS max_cid + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.merchant_id = merchants.mid + LEFT JOIN _s3 AS _s3 + ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid GROUP BY 1, 2 ) SELECT - merchants.mid AS merchants_id, - merchants.created_at AS merchant_registration_date, - _s1.min_start_date AS earliest_coupon_start_date, - _s3.max_cid AS earliest_coupon_id -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid -LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid -JOIN _t0 AS _s5 - ON _s5.merchant_id = merchants.mid - AND _s5.start_date <= DATE_ADD(CAST(merchants.created_at AS TIMESTAMP), 1, 'YEAR') + _s4.merchant_id AS merchants_id, + _s4.anything_created_at AS merchant_registration_date, + _s4.start_date AS earliest_coupon_start_date, + _s4.max_cid AS earliest_coupon_id +FROM _s4 AS _s4 +JOIN _t1 AS _s5 + ON _s4.merchant_id = _s5.merchant_id + AND _s5.start_date <= DATE_ADD(CAST(_s4.anything_created_at AS TIMESTAMP), 1, 'YEAR') diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql b/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql index 65fdd6894..fa0f503e2 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT merchant_id, start_date @@ -7,29 +7,36 @@ WITH _t0 AS ( SELECT merchant_id, MIN(start_date) AS min_start_date - FROM _t0 + FROM _t1 GROUP BY 1 ), _s3 AS ( SELECT + cid, merchant_id, - start_date, - MAX(cid) AS max_cid + start_date FROM main.coupons +), _s4 AS ( + SELECT + _s3.merchant_id, + _s3.start_date, + ANY_VALUE(merchants.created_at) AS anything_created_at, + MAX(_s3.cid) AS max_cid + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.merchant_id = merchants.mid + LEFT JOIN _s3 AS _s3 + ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid GROUP BY 1, 2 ) SELECT - merchants.mid AS merchants_id, - merchants.created_at AS merchant_registration_date, - _s1.min_start_date AS earliest_coupon_start_date, - _s3.max_cid AS earliest_coupon_id -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid -LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid -JOIN _t0 AS _s5 - ON _s5.merchant_id = merchants.mid - AND _s5.start_date <= DATE_ADD(CAST(merchants.created_at AS DATETIME), INTERVAL '1' YEAR) + _s4.merchant_id AS merchants_id, + _s4.anything_created_at AS merchant_registration_date, + _s4.start_date AS earliest_coupon_start_date, + _s4.max_cid AS earliest_coupon_id +FROM _s4 AS _s4 +JOIN _t1 AS _s5 + ON _s4.merchant_id = _s5.merchant_id + AND _s5.start_date <= DATE_ADD(CAST(_s4.anything_created_at AS DATETIME), INTERVAL '1' YEAR) diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql b/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql index be36c89fb..5004a06ec 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT merchant_id, start_date @@ -7,29 +7,36 @@ WITH _t0 AS ( SELECT merchant_id, MIN(start_date) AS min_start_date - FROM _t0 + FROM _t1 GROUP BY 1 ), _s3 AS ( SELECT + cid, merchant_id, - start_date, - MAX(cid) AS max_cid + start_date FROM main.coupons +), _s4 AS ( + SELECT + _s3.merchant_id, + _s3.start_date, + MAX(merchants.created_at) AS anything_created_at, + MAX(_s3.cid) AS max_cid + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.merchant_id = merchants.mid + LEFT JOIN _s3 AS _s3 + ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid GROUP BY 1, 2 ) SELECT - merchants.mid AS merchants_id, - merchants.created_at AS merchant_registration_date, - _s1.min_start_date AS earliest_coupon_start_date, - _s3.max_cid AS earliest_coupon_id -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid -LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid -JOIN _t0 AS _s5 - ON _s5.merchant_id = merchants.mid - AND _s5.start_date <= CAST(merchants.created_at AS TIMESTAMP) + INTERVAL '1 YEAR' + _s4.merchant_id AS merchants_id, + _s4.anything_created_at AS merchant_registration_date, + _s4.start_date AS earliest_coupon_start_date, + _s4.max_cid AS earliest_coupon_id +FROM _s4 AS _s4 +JOIN _t1 AS _s5 + ON _s4.merchant_id = _s5.merchant_id + AND _s5.start_date <= CAST(_s4.anything_created_at AS TIMESTAMP) + INTERVAL '1 YEAR' diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql index 867b81dd2..7ebb2f8df 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT merchant_id, start_date @@ -7,29 +7,36 @@ WITH _t0 AS ( SELECT merchant_id, MIN(start_date) AS min_start_date - FROM _t0 + FROM _t1 GROUP BY 1 ), _s3 AS ( SELECT + cid, merchant_id, - start_date, - MAX(cid) AS max_cid + start_date FROM main.coupons +), _s4 AS ( + SELECT + _s3.merchant_id, + _s3.start_date, + ANY_VALUE(merchants.created_at) AS anything_created_at, + MAX(_s3.cid) AS max_cid + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.merchant_id = merchants.mid + LEFT JOIN _s3 AS _s3 + ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid GROUP BY 1, 2 ) SELECT - merchants.mid AS merchants_id, - merchants.created_at AS merchant_registration_date, - _s1.min_start_date AS earliest_coupon_start_date, - _s3.max_cid AS earliest_coupon_id -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid -LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid -JOIN _t0 AS _s5 - ON _s5.merchant_id = merchants.mid - AND _s5.start_date <= DATEADD(YEAR, 1, CAST(merchants.created_at AS TIMESTAMP)) + _s4.merchant_id AS merchants_id, + _s4.anything_created_at AS merchant_registration_date, + _s4.start_date AS earliest_coupon_start_date, + _s4.max_cid AS earliest_coupon_id +FROM _s4 AS _s4 +JOIN _t1 AS _s5 + ON _s4.merchant_id = _s5.merchant_id + AND _s5.start_date <= DATEADD(YEAR, 1, CAST(_s4.anything_created_at AS TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql index ffd54f0de..d7ad86900 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT merchant_id, start_date @@ -7,29 +7,36 @@ WITH _t0 AS ( SELECT merchant_id, MIN(start_date) AS min_start_date - FROM _t0 + FROM _t1 GROUP BY 1 ), _s3 AS ( SELECT + cid, merchant_id, - start_date, - MAX(cid) AS max_cid + start_date FROM main.coupons +), _s4 AS ( + SELECT + _s3.merchant_id, + _s3.start_date, + MAX(merchants.created_at) AS anything_created_at, + MAX(_s3.cid) AS max_cid + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.merchant_id = merchants.mid + LEFT JOIN _s3 AS _s3 + ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid GROUP BY 1, 2 ) SELECT - merchants.mid AS merchants_id, - merchants.created_at AS merchant_registration_date, - _s1.min_start_date AS earliest_coupon_start_date, - _s3.max_cid AS earliest_coupon_id -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.merchant_id = merchants.mid -LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid -JOIN _t0 AS _s5 - ON _s5.merchant_id = merchants.mid - AND _s5.start_date <= DATETIME(merchants.created_at, '1 year') + _s4.merchant_id AS merchants_id, + _s4.anything_created_at AS merchant_registration_date, + _s4.start_date AS earliest_coupon_start_date, + _s4.max_cid AS earliest_coupon_id +FROM _s4 AS _s4 +JOIN _t1 AS _s5 + ON _s4.merchant_id = _s5.merchant_id + AND _s5.start_date <= DATETIME(_s4.anything_created_at, '1 year') diff --git a/tests/test_sql_refsols/quantile_test_2_ansi.sql b/tests/test_sql_refsols/quantile_test_2_ansi.sql index 4ba2e6135..3bd64c4cb 100644 --- a/tests/test_sql_refsols/quantile_test_2_ansi.sql +++ b/tests/test_sql_refsols/quantile_test_2_ansi.sql @@ -10,47 +10,39 @@ WITH _s0 AS ( ), _s5 AS ( SELECT customer.c_nationkey, - PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_0, - PERCENTILE_DISC(0.01) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_1, - PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_2, - PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_3, - PERCENTILE_DISC(0.9) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_4, - PERCENTILE_DISC(0.99) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_5, - PERCENTILE_DISC(1.0) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_6, - PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_7, - PERCENTILE_DISC(0.0) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS agg_8 + orders.o_totalprice FROM tpch.customer AS customer JOIN tpch.orders AS orders ON EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) = 1998 AND customer.c_custkey = orders.o_custkey - GROUP BY - 1 ) SELECT - region.r_name AS region_name, - _s0.n_name AS nation_name, - _s5.agg_8 AS orders_min, - _s5.agg_1 AS orders_1_percent, - _s5.agg_0 AS orders_10_percent, - _s5.agg_2 AS orders_25_percent, - _s5.agg_7 AS orders_median, - _s5.agg_3 AS orders_75_percent, - _s5.agg_4 AS orders_90_percent, - _s5.agg_5 AS orders_99_percent, - _s5.agg_6 AS orders_max + ANY_VALUE(region.r_name) AS region_name, + ANY_VALUE(_s0.n_name) AS nation_name, + PERCENTILE_DISC(0.0) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_min, + PERCENTILE_DISC(0.01) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_1_percent, + PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_10_percent, + PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_25_percent, + PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_median, + PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_75_percent, + PERCENTILE_DISC(0.9) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_90_percent, + PERCENTILE_DISC(0.99) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_99_percent, + PERCENTILE_DISC(1.0) WITHIN GROUP (ORDER BY + _s5.o_totalprice NULLS LAST) AS orders_max FROM _s0 AS _s0 JOIN tpch.region AS region ON _s0.n_regionkey = region.r_regionkey LEFT JOIN _s5 AS _s5 ON _s0.n_nationkey = _s5.c_nationkey +GROUP BY + _s5.c_nationkey ORDER BY 2 diff --git a/tests/test_sql_refsols/quantile_test_2_mysql.sql b/tests/test_sql_refsols/quantile_test_2_mysql.sql index a406ab474..d55c23d33 100644 --- a/tests/test_sql_refsols/quantile_test_2_mysql.sql +++ b/tests/test_sql_refsols/quantile_test_2_mysql.sql @@ -7,110 +7,101 @@ WITH _s0 AS ( ORDER BY 1 LIMIT 5 -), _t2 AS ( +), _s5 AS ( SELECT CUSTOMER.c_nationkey, - ORDERS.o_totalprice, + ORDERS.o_totalprice + FROM tpch.CUSTOMER AS CUSTOMER + JOIN tpch.ORDERS AS ORDERS + ON CUSTOMER.c_custkey = ORDERS.o_custkey + AND EXTRACT(YEAR FROM CAST(ORDERS.o_orderdate AS DATETIME)) = 1998 +), _t1 AS ( + SELECT + _s5.c_nationkey, + _s0.n_name, + _s5.o_totalprice, + REGION.r_name, CASE WHEN TRUNCATE( - CAST(0.99 * COUNT(ORDERS.o_totalprice) OVER (PARTITION BY CUSTOMER.c_nationkey) AS FLOAT), + CAST(0.99 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + ) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_10, CASE WHEN TRUNCATE( - CAST(0.75 * COUNT(ORDERS.o_totalprice) OVER (PARTITION BY CUSTOMER.c_nationkey) AS FLOAT), + CAST(0.75 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + ) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_11, CASE WHEN TRUNCATE( - CAST(0.25 * COUNT(ORDERS.o_totalprice) OVER (PARTITION BY CUSTOMER.c_nationkey) AS FLOAT), + CAST(0.25 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + ) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_12, CASE WHEN TRUNCATE( - CAST(0.09999999999999998 * COUNT(ORDERS.o_totalprice) OVER (PARTITION BY CUSTOMER.c_nationkey) AS FLOAT), + CAST(0.09999999999999998 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + ) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_13, CASE WHEN TRUNCATE( - CAST(0.010000000000000009 * COUNT(ORDERS.o_totalprice) OVER (PARTITION BY CUSTOMER.c_nationkey) AS FLOAT), + CAST(0.010000000000000009 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + ) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_14, CASE WHEN TRUNCATE( - CAST(0.5 * COUNT(ORDERS.o_totalprice) OVER (PARTITION BY CUSTOMER.c_nationkey) AS FLOAT), + CAST(0.5 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + ) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_16, CASE - WHEN TRUNCATE( - CAST(COUNT(ORDERS.o_totalprice) OVER (PARTITION BY CUSTOMER.c_nationkey) AS FLOAT), - 0 - ) < ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + WHEN TRUNCATE(CAST(COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS FLOAT), 0) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_17, CASE WHEN TRUNCATE( - CAST(0.9 * COUNT(ORDERS.o_totalprice) OVER (PARTITION BY CUSTOMER.c_nationkey) AS FLOAT), + CAST(0.9 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CUSTOMER.c_nationkey ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + ) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_9 - FROM tpch.CUSTOMER AS CUSTOMER - JOIN tpch.ORDERS AS ORDERS - ON CUSTOMER.c_custkey = ORDERS.o_custkey - AND EXTRACT(YEAR FROM CAST(ORDERS.o_orderdate AS DATETIME)) = 1998 -), _s5 AS ( - SELECT - c_nationkey, - MAX(expr_10) AS max_expr_10, - MAX(expr_11) AS max_expr_11, - MAX(expr_12) AS max_expr_12, - MAX(expr_13) AS max_expr_13, - MAX(expr_14) AS max_expr_14, - MAX(expr_16) AS max_expr_16, - MAX(expr_17) AS max_expr_17, - MAX(expr_9) AS max_expr_9, - MAX(o_totalprice) AS max_o_totalprice - FROM _t2 - GROUP BY - 1 + FROM _s0 AS _s0 + JOIN tpch.REGION AS REGION + ON REGION.r_regionkey = _s0.n_regionkey + LEFT JOIN _s5 AS _s5 + ON _s0.n_nationkey = _s5.c_nationkey ) SELECT - REGION.r_name AS region_name, - _s0.n_name COLLATE utf8mb4_bin AS nation_name, - _s5.max_expr_17 AS orders_min, - _s5.max_expr_10 AS orders_1_percent, - _s5.max_expr_9 AS orders_10_percent, - _s5.max_expr_11 AS orders_25_percent, - _s5.max_expr_16 AS orders_median, - _s5.max_expr_12 AS orders_75_percent, - _s5.max_expr_13 AS orders_90_percent, - _s5.max_expr_14 AS orders_99_percent, - _s5.max_o_totalprice AS orders_max -FROM _s0 AS _s0 -JOIN tpch.REGION AS REGION - ON REGION.r_regionkey = _s0.n_regionkey -LEFT JOIN _s5 AS _s5 - ON _s0.n_nationkey = _s5.c_nationkey + ANY_VALUE(r_name) AS region_name, + ANY_VALUE(n_name) COLLATE utf8mb4_bin AS nation_name, + MAX(expr_17) AS orders_min, + MAX(expr_10) AS orders_1_percent, + MAX(expr_9) AS orders_10_percent, + MAX(expr_11) AS orders_25_percent, + MAX(expr_16) AS orders_median, + MAX(expr_12) AS orders_75_percent, + MAX(expr_13) AS orders_90_percent, + MAX(expr_14) AS orders_99_percent, + MAX(o_totalprice) AS orders_max +FROM _t1 +GROUP BY + c_nationkey ORDER BY 2 diff --git a/tests/test_sql_refsols/quantile_test_2_postgres.sql b/tests/test_sql_refsols/quantile_test_2_postgres.sql index 0f38ecac2..0e7d236ee 100644 --- a/tests/test_sql_refsols/quantile_test_2_postgres.sql +++ b/tests/test_sql_refsols/quantile_test_2_postgres.sql @@ -10,47 +10,39 @@ WITH _s0 AS ( ), _s5 AS ( SELECT customer.c_nationkey, - PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_0, - PERCENTILE_DISC(0.01) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_1, - PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_2, - PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_3, - PERCENTILE_DISC(0.9) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_4, - PERCENTILE_DISC(0.99) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_5, - PERCENTILE_DISC(1.0) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_6, - PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_7, - PERCENTILE_DISC(0.0) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_8 + orders.o_totalprice FROM tpch.customer AS customer JOIN tpch.orders AS orders ON EXTRACT(YEAR FROM CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 AND customer.c_custkey = orders.o_custkey - GROUP BY - 1 ) SELECT - region.r_name AS region_name, - _s0.n_name AS nation_name, - _s5.agg_8 AS orders_min, - _s5.agg_1 AS orders_1_percent, - _s5.agg_0 AS orders_10_percent, - _s5.agg_2 AS orders_25_percent, - _s5.agg_7 AS orders_median, - _s5.agg_3 AS orders_75_percent, - _s5.agg_4 AS orders_90_percent, - _s5.agg_5 AS orders_99_percent, - _s5.agg_6 AS orders_max + MAX(region.r_name) AS region_name, + MAX(_s0.n_name) AS nation_name, + PERCENTILE_DISC(0.0) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_min, + PERCENTILE_DISC(0.01) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_1_percent, + PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_10_percent, + PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_25_percent, + PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_median, + PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_75_percent, + PERCENTILE_DISC(0.9) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_90_percent, + PERCENTILE_DISC(0.99) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_99_percent, + PERCENTILE_DISC(1.0) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_max FROM _s0 AS _s0 JOIN tpch.region AS region ON _s0.n_regionkey = region.r_regionkey LEFT JOIN _s5 AS _s5 ON _s0.n_nationkey = _s5.c_nationkey +GROUP BY + _s5.c_nationkey ORDER BY 2 NULLS FIRST diff --git a/tests/test_sql_refsols/quantile_test_2_snowflake.sql b/tests/test_sql_refsols/quantile_test_2_snowflake.sql index ab146d061..022e21507 100644 --- a/tests/test_sql_refsols/quantile_test_2_snowflake.sql +++ b/tests/test_sql_refsols/quantile_test_2_snowflake.sql @@ -10,47 +10,39 @@ WITH _s0 AS ( ), _s5 AS ( SELECT customer.c_nationkey, - PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_0, - PERCENTILE_DISC(0.01) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_1, - PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_2, - PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_3, - PERCENTILE_DISC(0.9) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_4, - PERCENTILE_DISC(0.99) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_5, - PERCENTILE_DISC(1.0) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_6, - PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_7, - PERCENTILE_DISC(0.0) WITHIN GROUP (ORDER BY - orders.o_totalprice) AS agg_8 + orders.o_totalprice FROM tpch.customer AS customer JOIN tpch.orders AS orders ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 AND customer.c_custkey = orders.o_custkey - GROUP BY - 1 ) SELECT - region.r_name AS region_name, - _s0.n_name AS nation_name, - _s5.agg_8 AS orders_min, - _s5.agg_1 AS orders_1_percent, - _s5.agg_0 AS orders_10_percent, - _s5.agg_2 AS orders_25_percent, - _s5.agg_7 AS orders_median, - _s5.agg_3 AS orders_75_percent, - _s5.agg_4 AS orders_90_percent, - _s5.agg_5 AS orders_99_percent, - _s5.agg_6 AS orders_max + ANY_VALUE(region.r_name) AS region_name, + ANY_VALUE(_s0.n_name) AS nation_name, + PERCENTILE_DISC(0.0) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_min, + PERCENTILE_DISC(0.01) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_1_percent, + PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_10_percent, + PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_25_percent, + PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_median, + PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_75_percent, + PERCENTILE_DISC(0.9) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_90_percent, + PERCENTILE_DISC(0.99) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_99_percent, + PERCENTILE_DISC(1.0) WITHIN GROUP (ORDER BY + _s5.o_totalprice) AS orders_max FROM _s0 AS _s0 JOIN tpch.region AS region ON _s0.n_regionkey = region.r_regionkey LEFT JOIN _s5 AS _s5 ON _s0.n_nationkey = _s5.c_nationkey +GROUP BY + _s5.c_nationkey ORDER BY 2 NULLS FIRST diff --git a/tests/test_sql_refsols/quantile_test_2_sqlite.sql b/tests/test_sql_refsols/quantile_test_2_sqlite.sql index 4f083adf1..ca3c12a6e 100644 --- a/tests/test_sql_refsols/quantile_test_2_sqlite.sql +++ b/tests/test_sql_refsols/quantile_test_2_sqlite.sql @@ -7,86 +7,80 @@ WITH _s0 AS ( ORDER BY 1 LIMIT 5 -), _t1 AS ( +), _s5 AS ( SELECT customer.c_nationkey, - orders.o_totalprice, + orders.o_totalprice + FROM tpch.customer AS customer + JOIN tpch.orders AS orders + ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 + AND customer.c_custkey = orders.o_custkey +), _t1 AS ( + SELECT + _s5.c_nationkey, + _s0.n_name, + _s5.o_totalprice, + region.r_name, CASE - WHEN CAST(0.99 * COUNT(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.99 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_10, CASE - WHEN CAST(0.75 * COUNT(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.75 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_11, CASE - WHEN CAST(0.25 * COUNT(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.25 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_12, CASE - WHEN CAST(0.09999999999999998 * COUNT(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.09999999999999998 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_13, CASE - WHEN CAST(0.010000000000000009 * COUNT(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.010000000000000009 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_14, CASE - WHEN CAST(0.5 * COUNT(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.5 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_16, CASE - WHEN CAST(COUNT(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_17, CASE - WHEN CAST(0.9 * COUNT(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.9 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice ELSE NULL END AS expr_9 - FROM tpch.customer AS customer - JOIN tpch.orders AS orders - ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 - AND customer.c_custkey = orders.o_custkey -), _s5 AS ( - SELECT - c_nationkey, - MAX(expr_10) AS max_expr_10, - MAX(expr_11) AS max_expr_11, - MAX(expr_12) AS max_expr_12, - MAX(expr_13) AS max_expr_13, - MAX(expr_14) AS max_expr_14, - MAX(expr_16) AS max_expr_16, - MAX(expr_17) AS max_expr_17, - MAX(expr_9) AS max_expr_9, - MAX(o_totalprice) AS max_o_totalprice - FROM _t1 - GROUP BY - 1 + FROM _s0 AS _s0 + JOIN tpch.region AS region + ON _s0.n_regionkey = region.r_regionkey + LEFT JOIN _s5 AS _s5 + ON _s0.n_nationkey = _s5.c_nationkey ) SELECT - region.r_name AS region_name, - _s0.n_name AS nation_name, - _s5.max_expr_17 AS orders_min, - _s5.max_expr_10 AS orders_1_percent, - _s5.max_expr_9 AS orders_10_percent, - _s5.max_expr_11 AS orders_25_percent, - _s5.max_expr_16 AS orders_median, - _s5.max_expr_12 AS orders_75_percent, - _s5.max_expr_13 AS orders_90_percent, - _s5.max_expr_14 AS orders_99_percent, - _s5.max_o_totalprice AS orders_max -FROM _s0 AS _s0 -JOIN tpch.region AS region - ON _s0.n_regionkey = region.r_regionkey -LEFT JOIN _s5 AS _s5 - ON _s0.n_nationkey = _s5.c_nationkey + MAX(r_name) AS region_name, + MAX(n_name) AS nation_name, + MAX(expr_17) AS orders_min, + MAX(expr_10) AS orders_1_percent, + MAX(expr_9) AS orders_10_percent, + MAX(expr_11) AS orders_25_percent, + MAX(expr_16) AS orders_median, + MAX(expr_12) AS orders_75_percent, + MAX(expr_13) AS orders_90_percent, + MAX(expr_14) AS orders_99_percent, + MAX(o_totalprice) AS orders_max +FROM _t1 +GROUP BY + c_nationkey ORDER BY 2 From 88e74b2ace8f007eba1da96415bf5d4c811081b6 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 20 Oct 2025 15:00:15 -0400 Subject: [PATCH 111/143] Adding COUNT(*) left case, WIP --- .../conversion/join_aggregate_transpose.py | 2 +- tests/test_plan_refsols/common_prefix_ag.txt | 65 +++++++++---------- tests/test_plan_refsols/common_prefix_ah.txt | 30 ++++----- tests/test_plan_refsols/common_prefix_ai.txt | 60 +++++++++-------- tests/test_plan_refsols/common_prefix_aj.txt | 65 +++++++++---------- tests/test_plan_refsols/common_prefix_ak.txt | 61 +++++++++-------- tests/test_plan_refsols/common_prefix_an.txt | 30 ++++----- tests/test_plan_refsols/common_prefix_ao.txt | 44 ++++++------- tests/test_plan_refsols/common_prefix_u.txt | 12 ++-- tests/test_plan_refsols/common_prefix_x.txt | 12 ++-- tests/test_plan_refsols/common_prefix_y.txt | 16 ++--- tests/test_plan_refsols/correl_1.txt | 8 +-- tests/test_plan_refsols/correl_2.txt | 14 ++-- .../cryptbank_general_join_01_raw.txt | 17 +++-- .../cryptbank_general_join_01_rewrite.txt | 17 +++-- .../epoch_intra_season_searches.txt | 12 ++-- tests/test_plan_refsols/singular7.txt | 15 ++--- ...chnograph_country_combination_analysis.txt | 18 ++--- ...nograph_country_incident_rate_analysis.txt | 14 ++-- ..._error_rate_sun_set_by_factory_country.txt | 13 ++-- tests/test_plan_refsols/tpch_q21.txt | 14 ++-- tests/test_plan_refsols/tpch_q22.txt | 22 +++---- .../window_filter_order_1.txt | 14 ++-- .../window_filter_order_2.txt | 14 ++-- .../window_filter_order_3.txt | 14 ++-- .../window_filter_order_8.txt | 14 ++-- tests/test_sql_refsols/correl_1_sqlite.sql | 17 +++-- tests/test_sql_refsols/correl_2_sqlite.sql | 19 +++--- .../cryptbank_general_join_01_raw_sqlite.sql | 22 +++---- ...yptbank_general_join_01_rewrite_sqlite.sql | 22 +++---- .../defog_broker_adv10_ansi.sql | 22 +++---- .../defog_broker_adv10_mysql.sql | 22 +++---- .../defog_broker_adv10_postgres.sql | 22 +++---- .../defog_broker_adv10_snowflake.sql | 22 +++---- .../defog_broker_adv10_sqlite.sql | 22 +++---- .../defog_dealership_adv3_ansi.sql | 13 ++-- .../defog_dealership_adv3_mysql.sql | 13 ++-- .../defog_dealership_adv3_postgres.sql | 13 ++-- .../defog_dealership_adv3_snowflake.sql | 13 ++-- .../defog_dealership_adv3_sqlite.sql | 13 ++-- .../defog_dealership_adv4_ansi.sql | 17 ++--- .../defog_dealership_adv4_mysql.sql | 17 ++--- .../defog_dealership_adv4_postgres.sql | 17 ++--- .../defog_dealership_adv4_snowflake.sql | 17 ++--- .../defog_dealership_adv4_sqlite.sql | 17 ++--- .../defog_ewallet_basic10_ansi.sql | 15 ++--- .../defog_ewallet_basic10_mysql.sql | 15 ++--- .../defog_ewallet_basic10_postgres.sql | 15 ++--- .../defog_ewallet_basic10_snowflake.sql | 15 ++--- .../defog_ewallet_basic10_sqlite.sql | 15 ++--- .../defog_ewallet_basic8_ansi.sql | 14 ++-- .../defog_ewallet_basic8_mysql.sql | 14 ++-- .../defog_ewallet_basic8_postgres.sql | 14 ++-- .../defog_ewallet_basic8_snowflake.sql | 14 ++-- .../defog_ewallet_basic8_sqlite.sql | 14 ++-- .../epoch_intra_season_searches_ansi.sql | 23 ++++--- .../epoch_intra_season_searches_mysql.sql | 23 ++++--- .../epoch_intra_season_searches_postgres.sql | 23 ++++--- .../epoch_intra_season_searches_snowflake.sql | 23 ++++--- .../epoch_intra_season_searches_sqlite.sql | 23 ++++--- ...raph_country_combination_analysis_ansi.sql | 25 ++++--- ...aph_country_combination_analysis_mysql.sql | 25 ++++--- ..._country_combination_analysis_postgres.sql | 25 ++++--- ...country_combination_analysis_snowflake.sql | 25 ++++--- ...ph_country_combination_analysis_sqlite.sql | 25 ++++--- ...ph_country_incident_rate_analysis_ansi.sql | 24 +++---- ...h_country_incident_rate_analysis_mysql.sql | 24 +++---- ...ountry_incident_rate_analysis_postgres.sql | 24 +++---- ...untry_incident_rate_analysis_snowflake.sql | 24 +++---- ..._country_incident_rate_analysis_sqlite.sql | 24 +++---- ...r_rate_sun_set_by_factory_country_ansi.sql | 24 ++++--- ..._rate_sun_set_by_factory_country_mysql.sql | 24 ++++--- ...te_sun_set_by_factory_country_postgres.sql | 24 ++++--- ...e_sun_set_by_factory_country_snowflake.sql | 24 ++++--- ...rate_sun_set_by_factory_country_sqlite.sql | 24 ++++--- tests/test_sql_refsols/tpch_q21_ansi.sql | 11 ++-- tests/test_sql_refsols/tpch_q21_mysql.sql | 11 ++-- tests/test_sql_refsols/tpch_q21_postgres.sql | 11 ++-- tests/test_sql_refsols/tpch_q21_snowflake.sql | 11 ++-- tests/test_sql_refsols/tpch_q21_sqlite.sql | 11 ++-- tests/test_sql_refsols/tpch_q22_ansi.sql | 29 +++++---- tests/test_sql_refsols/tpch_q22_mysql.sql | 29 +++++---- tests/test_sql_refsols/tpch_q22_postgres.sql | 29 +++++---- tests/test_sql_refsols/tpch_q22_snowflake.sql | 29 +++++---- tests/test_sql_refsols/tpch_q22_sqlite.sql | 29 +++++---- 85 files changed, 882 insertions(+), 900 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index 9b81026be..d315a86e4 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -33,7 +33,7 @@ class JoinAggregateTransposeShuttle(RelationalShuttle): """ left_join_case_ops = { - # pydop.COUNT, + pydop.COUNT, pydop.MIN, pydop.MAX, pydop.SUM, diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index e1f56b3de..7874aaef6 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,16 +1,16 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_n_rows': COUNT(), 'sum_sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) @@ -21,23 +21,22 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index f8ece17be..b510473c6 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,18 +1,18 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index 3a5ab9fc1..ff8d344eb 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,31 +1,29 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_n_rows': COUNT(), 'sum_sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index f5d093868..632a4a221 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,16 +1,16 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_n_rows': COUNT(), 'sum_sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) @@ -21,23 +21,22 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 3b9d79163..a480a8f39 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,16 +1,16 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_n_rows': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) @@ -21,21 +21,20 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 6a01e42e5..4f7765ee4 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,18 +1,18 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('cust_key', anything_o_custkey), ('n_orders', n_rows), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(anything_o_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & n_rows > RELAVG(args=[n_rows], partition=[anything_c_nationkey], order=[]) & sum_n_rows > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'anything_c_nationkey': ANYTHING(c_nationkey), 'anything_n_rows_0': ANYTHING(n_rows_0), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'n_rows_0': t0.n_rows}) + LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index e5965da48..9b07af230 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,25 +1,25 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', n_rows), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) - LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', n_rows), ('n_part_purchases', anything_sum_n_rows)], orderings=[(o_custkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]), columns={'anything_n_rows': anything_n_rows, 'anything_sum_n_rows': anything_sum_n_rows, 'n_rows': n_rows, 'o_custkey': o_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'anything_sum_n_rows': ANYTHING(sum_n_rows), 'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey, 'sum_n_rows': t0.sum_n_rows}) + LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 5129a5674..4a94a7d76 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,14 +1,14 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_quantity': t1.l_quantity, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index e1afb10cf..aedddfbd4 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,10 +1,10 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index a5ae2d504..2cf917080 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('name', anything_c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (anything_c_name):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'n_rows': n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index 352d9d69f..a9637676f 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('region_name', anything_r_name), ('n_prefix_nations', n_rows)], orderings=[(anything_r_name):asc_first]) + AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'anything_r_name': ANYTHING(r_name), 'n_rows': COUNT()}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index bf42b8cd6..74b9a55e0 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('name', n_name), ('n_selected_custs', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(n_name):asc_first]) - JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey, 'expr_1': SLICE(c_comment, None:unknown, 1:numeric, None:unknown)}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('name', anything_n_name), ('n_selected_custs', n_rows)], orderings=[(anything_n_name):asc_first]) + AGGREGATE(keys={'c_nationkey': c_nationkey, 'expr_1': SLICE(c_comment, None:unknown, 1:numeric, None:unknown)}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) + JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == SLICE(t1.c_comment, None:unknown, 1:numeric, None:unknown) & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_comment': t1.c_comment, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt index 641e01b1c..1e03527af 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == t1.unmask_c_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) +ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) + AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': NDISTINCT(UNMASK::((42 - ([c_key])))), 'sum_n_rows': COUNT()}) + JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == UNMASK::((42 - ([t1.c_key]))), type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t1.b_key, 'c_key': t1.c_key}) JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) - JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) + JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt index 641e01b1c..1e03527af 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == t1.unmask_c_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) +ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) + AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': NDISTINCT(UNMASK::((42 - ([c_key])))), 'sum_n_rows': COUNT()}) + JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == UNMASK::((42 - ([t1.c_key]))), type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t1.b_key, 'c_key': t1.c_key}) JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) - JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) + JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 18a067096..712f469bb 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,11 +1,11 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) - AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(True:bool)}) + AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t1.s_name, 'search_id': t1.search_id}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index e49975afb..272a45e1c 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) +ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (s_name):asc_first], limit=5:numeric) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'n_rows': t1.n_rows, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(n_rows, 0:numeric)):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[l_suppkey], order=[(n_rows):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'l_suppkey': l_suppkey, 'n_rows': n_rows}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index c0ef3e327..0cb12afcc 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,14 +1,14 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.co_id == t1.anything_co_id & t0._id_1 == t1.anything__id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'n_rows': COUNT()}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'in_device_id': t1.in_device_id}) + JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index 1bd54185e..96d6186b2 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,5 +1,5 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.anything_us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) @@ -13,10 +13,10 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) - JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) - SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything_us_country_id': ANYTHING(us_country_id), 'n_rows': COUNT()}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'in_device_id': t1.in_device_id, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) + SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 9b738b2d0..0b5063ff5 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) +ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.anything_de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric)}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'n_rows': COUNT()}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 52d68c1b0..e033db867 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', n_rows)], orderings=[(n_rows):desc_last, (anything_s_name):asc_first], limit=10:numeric) + AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT()}) + JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t1.anything_l_suppkey, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index dbbf1ce32..8648c2c5b 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) - AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) - JOIN(condition=t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) - AGGREGATE(keys={}, aggregations={'avg_c_acctbal': AVG(c_acctbal)}) - FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - FILTER(condition=ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_anything_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) + AGGREGATE(keys={'cntry_code': SLICE(anything_c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_anything_c_acctbal': SUM(anything_c_acctbal)}) + FILTER(condition=n_rows == 0:numeric, columns={'anything_c_acctbal': anything_c_acctbal, 'anything_c_phone': anything_c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_phone': ANYTHING(c_phone), 'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'o_custkey': t1.o_custkey}) + JOIN(condition=t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) + AGGREGATE(keys={}, aggregations={'avg_c_acctbal': AVG(c_acctbal)}) + FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + FILTER(condition=ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index d3401bdb9..faf791703 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index d3401bdb9..faf791703 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index d3401bdb9..faf791703 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 72d47c245..6fb58180b 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ABSENT(n_rows) & c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ABSENT(n_rows) & anything_c_acctbal < RELSUM(args=[n_rows], partition=[], order=[]), columns={}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'o_custkey': t1.o_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_sql_refsols/correl_1_sqlite.sql b/tests/test_sql_refsols/correl_1_sqlite.sql index 036dcf89c..fe7e918fa 100644 --- a/tests/test_sql_refsols/correl_1_sqlite.sql +++ b/tests/test_sql_refsols/correl_1_sqlite.sql @@ -1,19 +1,18 @@ WITH _s1 AS ( SELECT - SUBSTRING(n_name, 1, 1) AS expr_1, - n_regionkey, - COUNT(*) AS n_rows + n_name, + n_regionkey FROM tpch.nation - GROUP BY - 1, - 2 ) SELECT - region.r_name AS region_name, - COALESCE(_s1.n_rows, 0) AS n_prefix_nations + MAX(region.r_name) AS region_name, + COUNT(*) AS n_prefix_nations FROM tpch.region AS region LEFT JOIN _s1 AS _s1 - ON _s1.expr_1 = SUBSTRING(region.r_name, 1, 1) + ON SUBSTRING(_s1.n_name, 1, 1) = SUBSTRING(region.r_name, 1, 1) AND _s1.n_regionkey = region.r_regionkey +GROUP BY + _s1.n_regionkey, + SUBSTRING(_s1.n_name, 1, 1) ORDER BY 1 diff --git a/tests/test_sql_refsols/correl_2_sqlite.sql b/tests/test_sql_refsols/correl_2_sqlite.sql index 877197ded..3ae3cfb6e 100644 --- a/tests/test_sql_refsols/correl_2_sqlite.sql +++ b/tests/test_sql_refsols/correl_2_sqlite.sql @@ -1,23 +1,22 @@ WITH _s3 AS ( SELECT - SUBSTRING(c_comment, 1, 1) AS expr_1, - c_nationkey, - COUNT(*) AS n_rows + c_comment, + c_nationkey FROM tpch.customer - GROUP BY - 1, - 2 ) SELECT - nation.n_name AS name, - COALESCE(_s3.n_rows, 0) AS n_selected_custs + MAX(nation.n_name) AS name, + COUNT(*) AS n_selected_custs FROM tpch.region AS region JOIN tpch.nation AS nation ON nation.n_regionkey = region.r_regionkey LEFT JOIN _s3 AS _s3 - ON _s3.c_nationkey = nation.n_nationkey - AND _s3.expr_1 = LOWER(SUBSTRING(region.r_name, 1, 1)) + ON LOWER(SUBSTRING(region.r_name, 1, 1)) = SUBSTRING(_s3.c_comment, 1, 1) + AND _s3.c_nationkey = nation.n_nationkey WHERE NOT region.r_name LIKE 'A%' +GROUP BY + _s3.c_nationkey, + SUBSTRING(_s3.c_comment, 1, 1) ORDER BY 1 diff --git a/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql index eef2a3632..ad9c80aeb 100644 --- a/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql @@ -10,13 +10,8 @@ WITH _s0 AS ( FROM crbnk.customers ), _s7 AS ( SELECT - ( - 42 - ( - _s3.c_key - ) - ) AS unmask_c_key, _s2.b_key, - COUNT(*) AS n_rows + _s3.c_key FROM _s0 AS _s2 JOIN _s1 AS _s3 ON SUBSTRING( @@ -80,14 +75,11 @@ WITH _s0 AS ( ON _s2.b_key = accounts.a_branchkey AND accounts.a_custkey = ( 42 - _s3.c_key ) - GROUP BY - 1, - 2 ) SELECT - _s0.b_key AS branch_key, - COUNT(*) AS n_local_cust, - COALESCE(SUM(_s7.n_rows), 0) AS n_local_cust_local_acct + _s7.b_key AS branch_key, + COUNT(DISTINCT 42 - _s7.c_key) AS n_local_cust, + COUNT(*) AS n_local_cust_local_acct FROM _s0 AS _s0 JOIN _s1 AS _s1 ON SUBSTRING( @@ -148,8 +140,10 @@ JOIN _s1 AS _s1 END ) LEFT JOIN _s7 AS _s7 - ON _s0.b_key = _s7.b_key AND _s7.unmask_c_key = ( + ON ( 42 - _s1.c_key - ) + ) = ( + 42 - _s7.c_key + ) AND _s0.b_key = _s7.b_key GROUP BY 1 diff --git a/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql index eef2a3632..ad9c80aeb 100644 --- a/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql @@ -10,13 +10,8 @@ WITH _s0 AS ( FROM crbnk.customers ), _s7 AS ( SELECT - ( - 42 - ( - _s3.c_key - ) - ) AS unmask_c_key, _s2.b_key, - COUNT(*) AS n_rows + _s3.c_key FROM _s0 AS _s2 JOIN _s1 AS _s3 ON SUBSTRING( @@ -80,14 +75,11 @@ WITH _s0 AS ( ON _s2.b_key = accounts.a_branchkey AND accounts.a_custkey = ( 42 - _s3.c_key ) - GROUP BY - 1, - 2 ) SELECT - _s0.b_key AS branch_key, - COUNT(*) AS n_local_cust, - COALESCE(SUM(_s7.n_rows), 0) AS n_local_cust_local_acct + _s7.b_key AS branch_key, + COUNT(DISTINCT 42 - _s7.c_key) AS n_local_cust, + COUNT(*) AS n_local_cust_local_acct FROM _s0 AS _s0 JOIN _s1 AS _s1 ON SUBSTRING( @@ -148,8 +140,10 @@ JOIN _s1 AS _s1 END ) LEFT JOIN _s7 AS _s7 - ON _s0.b_key = _s7.b_key AND _s7.unmask_c_key = ( + ON ( 42 - _s1.c_key - ) + ) = ( + 42 - _s7.c_key + ) AND _s0.b_key = _s7.b_key GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql index be57d4b91..6496569fe 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql @@ -1,24 +1,22 @@ WITH _s1 AS ( SELECT - EXTRACT(MONTH FROM CAST(sbtxdatetime AS DATETIME)) AS month_sbtxdatetime, - EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) AS year_sbtxdatetime, sbtxcustid, - COUNT(*) AS n_rows + sbtxdatetime FROM main.sbtransaction - GROUP BY - 1, - 2, - 3 ) SELECT - sbcustomer.sbcustid AS _id, - sbcustomer.sbcustname AS name, - COALESCE(_s1.n_rows, 0) AS num_transactions + _s1.sbtxcustid AS _id, + ANY_VALUE(sbcustomer.sbcustname) AS name, + COUNT(*) AS num_transactions FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 - ON _s1.month_sbtxdatetime = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) + ON EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS DATETIME)) = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) + AND EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS DATETIME)) = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) AND _s1.sbtxcustid = sbcustomer.sbcustid - AND _s1.year_sbtxdatetime = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) +GROUP BY + EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS DATETIME)), + EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS DATETIME)), + 1 ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_mysql.sql b/tests/test_sql_refsols/defog_broker_adv10_mysql.sql index 7f2e42798..0913b4047 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_mysql.sql @@ -1,24 +1,22 @@ WITH _s1 AS ( SELECT - EXTRACT(MONTH FROM CAST(sbtxdatetime AS DATETIME)) AS month_sbTxDateTime, - EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) AS year_sbTxDateTime, sbtxcustid AS sbTxCustId, - COUNT(*) AS n_rows + sbtxdatetime AS sbTxDateTime FROM main.sbTransaction - GROUP BY - 1, - 2, - 3 ) SELECT - sbCustomer.sbcustid AS _id, - sbCustomer.sbcustname AS name, - COALESCE(_s1.n_rows, 0) AS num_transactions + _s1.sbTxCustId AS _id, + ANY_VALUE(sbCustomer.sbcustname) AS name, + COUNT(*) AS num_transactions FROM main.sbCustomer AS sbCustomer LEFT JOIN _s1 AS _s1 - ON _s1.month_sbTxDateTime = EXTRACT(MONTH FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) + ON EXTRACT(MONTH FROM CAST(_s1.sbTxDateTime AS DATETIME)) = EXTRACT(MONTH FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) + AND EXTRACT(YEAR FROM CAST(_s1.sbTxDateTime AS DATETIME)) = EXTRACT(YEAR FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) AND _s1.sbTxCustId = sbCustomer.sbcustid - AND _s1.year_sbTxDateTime = EXTRACT(YEAR FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) +GROUP BY + EXTRACT(MONTH FROM CAST(_s1.sbTxDateTime AS DATETIME)), + EXTRACT(YEAR FROM CAST(_s1.sbTxDateTime AS DATETIME)), + 1 ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_postgres.sql b/tests/test_sql_refsols/defog_broker_adv10_postgres.sql index da91d43e9..736fa480d 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_postgres.sql @@ -1,24 +1,22 @@ WITH _s1 AS ( SELECT - EXTRACT(MONTH FROM CAST(sbtxdatetime AS TIMESTAMP)) AS month_sbtxdatetime, - EXTRACT(YEAR FROM CAST(sbtxdatetime AS TIMESTAMP)) AS year_sbtxdatetime, sbtxcustid, - COUNT(*) AS n_rows + sbtxdatetime FROM main.sbtransaction - GROUP BY - 1, - 2, - 3 ) SELECT - sbcustomer.sbcustid AS _id, - sbcustomer.sbcustname AS name, - COALESCE(_s1.n_rows, 0) AS num_transactions + _s1.sbtxcustid AS _id, + MAX(sbcustomer.sbcustname) AS name, + COUNT(*) AS num_transactions FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 - ON _s1.month_sbtxdatetime = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + ON EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)) = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)) = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) AND _s1.sbtxcustid = sbcustomer.sbcustid - AND _s1.year_sbtxdatetime = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) +GROUP BY + EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)), + EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)), + 1 ORDER BY 3 DESC NULLS LAST LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql index 908f8925f..e23896f7f 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql @@ -1,24 +1,22 @@ WITH _s1 AS ( SELECT - MONTH(CAST(sbtxdatetime AS TIMESTAMP)) AS month_sbtxdatetime, - YEAR(CAST(sbtxdatetime AS TIMESTAMP)) AS year_sbtxdatetime, sbtxcustid, - COUNT(*) AS n_rows + sbtxdatetime FROM main.sbtransaction - GROUP BY - 1, - 2, - 3 ) SELECT - sbcustomer.sbcustid AS _id, - sbcustomer.sbcustname AS name, - COALESCE(_s1.n_rows, 0) AS num_transactions + _s1.sbtxcustid AS _id, + ANY_VALUE(sbcustomer.sbcustname) AS name, + COUNT(*) AS num_transactions FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 - ON _s1.month_sbtxdatetime = MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + ON MONTH(CAST(_s1.sbtxdatetime AS TIMESTAMP)) = MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND YEAR(CAST(_s1.sbtxdatetime AS TIMESTAMP)) = YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) AND _s1.sbtxcustid = sbcustomer.sbcustid - AND _s1.year_sbtxdatetime = YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) +GROUP BY + 1, + MONTH(CAST(_s1.sbtxdatetime AS TIMESTAMP)), + YEAR(CAST(_s1.sbtxdatetime AS TIMESTAMP)) ORDER BY 3 DESC NULLS LAST LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql index 3d0abe9d0..0689c9fb0 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql @@ -1,24 +1,22 @@ WITH _s1 AS ( SELECT - CAST(STRFTIME('%m', sbtxdatetime) AS INTEGER) AS month_sbtxdatetime, - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS year_sbtxdatetime, sbtxcustid, - COUNT(*) AS n_rows + sbtxdatetime FROM main.sbtransaction - GROUP BY - 1, - 2, - 3 ) SELECT - sbcustomer.sbcustid AS _id, - sbcustomer.sbcustname AS name, - COALESCE(_s1.n_rows, 0) AS num_transactions + _s1.sbtxcustid AS _id, + MAX(sbcustomer.sbcustname) AS name, + COUNT(*) AS num_transactions FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 - ON _s1.month_sbtxdatetime = CAST(STRFTIME('%m', sbcustomer.sbcustjoindate) AS INTEGER) + ON CAST(STRFTIME('%Y', _s1.sbtxdatetime) AS INTEGER) = CAST(STRFTIME('%Y', sbcustomer.sbcustjoindate) AS INTEGER) + AND CAST(STRFTIME('%m', _s1.sbtxdatetime) AS INTEGER) = CAST(STRFTIME('%m', sbcustomer.sbcustjoindate) AS INTEGER) AND _s1.sbtxcustid = sbcustomer.sbcustid - AND _s1.year_sbtxdatetime = CAST(STRFTIME('%Y', sbcustomer.sbcustjoindate) AS INTEGER) +GROUP BY + CAST(STRFTIME('%Y', _s1.sbtxdatetime) AS INTEGER), + CAST(STRFTIME('%m', _s1.sbtxdatetime) AS INTEGER), + 1 ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index 0d07f9c46..e09347225 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - car_id, - COUNT(*) AS n_rows + car_id FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS num_sales + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + COUNT(*) AS num_sales FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE LOWER(cars.vin_number) LIKE '%m5%' +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index 0d07f9c46..e09347225 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - car_id, - COUNT(*) AS n_rows + car_id FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS num_sales + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + COUNT(*) AS num_sales FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE LOWER(cars.vin_number) LIKE '%m5%' +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index 0d07f9c46..60354e100 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - car_id, - COUNT(*) AS n_rows + car_id FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS num_sales + MAX(cars.make) AS make, + MAX(cars.model) AS model, + COUNT(*) AS num_sales FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE LOWER(cars.vin_number) LIKE '%m5%' +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index d6d854f70..3d7738824 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - car_id, - COUNT(*) AS n_rows + car_id FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS num_sales + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + COUNT(*) AS num_sales FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE CONTAINS(LOWER(cars.vin_number), 'm5') +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index 0d07f9c46..60354e100 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - car_id, - COUNT(*) AS n_rows + car_id FROM main.sales - GROUP BY - 1 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS num_sales + MAX(cars.make) AS make, + MAX(cars.model) AS model, + COUNT(*) AS num_sales FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE LOWER(cars.vin_number) LIKE '%m5%' +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql index 7888680d0..030fc8286 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql @@ -1,25 +1,18 @@ WITH _s1 AS ( SELECT car_id, - COUNT(*) AS n_rows, - SUM(sale_price) AS sum_sale_price + sale_price FROM main.sales WHERE sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) - GROUP BY - 1 ) SELECT - COALESCE(_s1.n_rows, 0) AS num_sales, - CASE - WHEN ( - NOT _s1.n_rows IS NULL AND _s1.n_rows > 0 - ) - THEN COALESCE(_s1.sum_sale_price, 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + COALESCE(SUM(_s1.sale_price), 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE LOWER(cars.make) LIKE '%toyota%' +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql index cf51a9c54..cb476ee00 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql @@ -1,25 +1,18 @@ WITH _s1 AS ( SELECT car_id, - COUNT(*) AS n_rows, - SUM(sale_price) AS sum_sale_price + sale_price FROM main.sales WHERE sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) - GROUP BY - 1 ) SELECT - COALESCE(_s1.n_rows, 0) AS num_sales, - CASE - WHEN ( - NOT _s1.n_rows IS NULL AND _s1.n_rows > 0 - ) - THEN COALESCE(_s1.sum_sale_price, 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + COALESCE(SUM(_s1.sale_price), 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE LOWER(cars.make) LIKE '%toyota%' +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql index 99650cd1d..0fd50d265 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql @@ -1,25 +1,18 @@ WITH _s1 AS ( SELECT car_id, - COUNT(*) AS n_rows, - SUM(sale_price) AS sum_sale_price + sale_price FROM main.sales WHERE sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' - GROUP BY - 1 ) SELECT - COALESCE(_s1.n_rows, 0) AS num_sales, - CASE - WHEN ( - NOT _s1.n_rows IS NULL AND _s1.n_rows > 0 - ) - THEN COALESCE(_s1.sum_sale_price, 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + COALESCE(SUM(_s1.sale_price), 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE LOWER(cars.make) LIKE '%toyota%' +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql index dcf1f3a92..2b5ff888f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -1,25 +1,18 @@ WITH _s1 AS ( SELECT car_id, - COUNT(*) AS n_rows, - SUM(sale_price) AS sum_sale_price + sale_price FROM main.sales WHERE sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) - GROUP BY - 1 ) SELECT - COALESCE(_s1.n_rows, 0) AS num_sales, - CASE - WHEN ( - NOT _s1.n_rows IS NULL AND _s1.n_rows > 0 - ) - THEN COALESCE(_s1.sum_sale_price, 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + COALESCE(SUM(_s1.sale_price), 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE CONTAINS(LOWER(cars.make), 'toyota') +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql index 56f43495a..d54948699 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql @@ -1,25 +1,18 @@ WITH _s1 AS ( SELECT car_id, - COUNT(*) AS n_rows, - SUM(sale_price) AS sum_sale_price + sale_price FROM main.sales WHERE sale_date >= DATETIME('now', '-30 day') - GROUP BY - 1 ) SELECT - COALESCE(_s1.n_rows, 0) AS num_sales, - CASE - WHEN ( - NOT _s1.n_rows IS NULL AND _s1.n_rows > 0 - ) - THEN COALESCE(_s1.sum_sale_price, 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + COALESCE(SUM(_s1.sale_price), 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id WHERE LOWER(cars.make) LIKE '%toyota%' +GROUP BY + _s1.car_id diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 95d2f6397..33f6b683a 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -1,22 +1,21 @@ WITH _s1 AS ( SELECT - receiver_id, - COUNT(*) AS n_rows, - SUM(amount) AS sum_amount + amount, + receiver_id FROM main.wallet_transactions_daily WHERE created_at >= DATE_TRUNC('DAY', DATE_SUB(CURRENT_TIMESTAMP(), 150, DAY)) AND receiver_type = 1 - GROUP BY - 1 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(*) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid +GROUP BY + _s1.receiver_id ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql index b2bded8f1..e2b281855 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql @@ -1,22 +1,21 @@ WITH _s1 AS ( SELECT - receiver_id, - COUNT(*) AS n_rows, - SUM(amount) AS sum_amount + amount, + receiver_id FROM main.wallet_transactions_daily WHERE created_at >= CAST(DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '150' DAY) AS DATE) AND receiver_type = 1 - GROUP BY - 1 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(*) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid +GROUP BY + _s1.receiver_id ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql index 7663d0ac5..4cd68e90b 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql @@ -1,22 +1,21 @@ WITH _s1 AS ( SELECT - receiver_id, - COUNT(*) AS n_rows, - SUM(amount) AS sum_amount + amount, + receiver_id FROM main.wallet_transactions_daily WHERE created_at >= DATE_TRUNC('DAY', CURRENT_TIMESTAMP - INTERVAL '150 DAY') AND receiver_type = 1 - GROUP BY - 1 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount + MAX(merchants.name) AS merchant_name, + COUNT(*) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid +GROUP BY + _s1.receiver_id ORDER BY 3 DESC NULLS LAST LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql index abf9ce4b6..4a1f050cb 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql @@ -1,22 +1,21 @@ WITH _s1 AS ( SELECT - receiver_id, - COUNT(*) AS n_rows, - SUM(amount) AS sum_amount + amount, + receiver_id FROM main.wallet_transactions_daily WHERE created_at >= DATE_TRUNC('DAY', DATEADD(DAY, -150, CURRENT_TIMESTAMP())) AND receiver_type = 1 - GROUP BY - 1 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(*) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid +GROUP BY + _s1.receiver_id ORDER BY 3 DESC NULLS LAST LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 6a4fff33f..2b2ccd335 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -1,22 +1,21 @@ WITH _s1 AS ( SELECT - receiver_id, - COUNT(*) AS n_rows, - SUM(amount) AS sum_amount + amount, + receiver_id FROM main.wallet_transactions_daily WHERE created_at >= DATE(DATETIME('now', '-150 day'), 'start of day') AND receiver_type = 1 - GROUP BY - 1 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount + MAX(merchants.name) AS merchant_name, + COUNT(*) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid +GROUP BY + _s1.receiver_id ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index 28ec4e7cd..9f466815c 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -1,19 +1,19 @@ WITH _s1 AS ( SELECT + amount, coupon_id, - COUNT(txid) AS count_txid, - SUM(amount) AS sum_amount + txid FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount + ANY_VALUE(coupons.code) AS coupon_code, + COUNT(_s1.txid) AS redemption_count, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid +GROUP BY + _s1.coupon_id ORDER BY 2 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql index 28ec4e7cd..9f466815c 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql @@ -1,19 +1,19 @@ WITH _s1 AS ( SELECT + amount, coupon_id, - COUNT(txid) AS count_txid, - SUM(amount) AS sum_amount + txid FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount + ANY_VALUE(coupons.code) AS coupon_code, + COUNT(_s1.txid) AS redemption_count, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid +GROUP BY + _s1.coupon_id ORDER BY 2 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql index 9799ac581..f8fd39f72 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql @@ -1,19 +1,19 @@ WITH _s1 AS ( SELECT + amount, coupon_id, - COUNT(txid) AS count_txid, - SUM(amount) AS sum_amount + txid FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount + MAX(coupons.code) AS coupon_code, + COUNT(_s1.txid) AS redemption_count, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid +GROUP BY + _s1.coupon_id ORDER BY 2 DESC NULLS LAST LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql index 9799ac581..7004136d7 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql @@ -1,19 +1,19 @@ WITH _s1 AS ( SELECT + amount, coupon_id, - COUNT(txid) AS count_txid, - SUM(amount) AS sum_amount + txid FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount + ANY_VALUE(coupons.code) AS coupon_code, + COUNT(_s1.txid) AS redemption_count, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid +GROUP BY + _s1.coupon_id ORDER BY 2 DESC NULLS LAST LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index 28ec4e7cd..12fb85faf 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -1,19 +1,19 @@ WITH _s1 AS ( SELECT + amount, coupon_id, - COUNT(txid) AS count_txid, - SUM(amount) AS sum_amount + txid FROM main.wallet_transactions_daily - GROUP BY - 1 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount + MAX(coupons.code) AS coupon_code, + COUNT(_s1.txid) AS redemption_count, + COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid +GROUP BY + _s1.coupon_id ORDER BY 2 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql index 2b80900d5..fb43a9449 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql @@ -13,8 +13,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT _s2.s_name, - searches.search_id, - COUNT(*) AS n_rows + searches.search_id FROM _s0 AS _s2 JOIN searches AS searches ON _s2.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS DATETIME)) @@ -29,16 +28,10 @@ WITH _s0 AS ( OR _s7.s_month2 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS DATETIME)) OR _s7.s_month3 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS DATETIME)) ) - GROUP BY - 1, - 2 -), _s16 AS ( - SELECT - _s0.s_name, - COUNT(*) AS n_rows, - SUM(( - NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - )) AS sum_is_intra_season +), _t1 AS ( + SELECT DISTINCT + _s9.s_name, + _s9.search_id FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS DATETIME)) @@ -46,6 +39,12 @@ WITH _s0 AS ( OR _s0.s_month3 = EXTRACT(MONTH FROM CAST(searches.search_ts AS DATETIME)) LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id +), _s16 AS ( + SELECT + s_name, + COUNT(*) AS n_rows, + SUM(TRUE) AS sum_is_intra_season + FROM _t1 GROUP BY 1 ), _s17 AS ( diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql index 4f19d94db..4956e8c5f 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql @@ -13,8 +13,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT _s2.s_name, - SEARCHES.search_id, - COUNT(*) AS n_rows + SEARCHES.search_id FROM _s0 AS _s2 JOIN SEARCHES AS SEARCHES ON _s2.s_month1 = EXTRACT(MONTH FROM CAST(SEARCHES.search_ts AS DATETIME)) @@ -29,16 +28,10 @@ WITH _s0 AS ( OR _s7.s_month2 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS DATETIME)) OR _s7.s_month3 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS DATETIME)) ) - GROUP BY - 1, - 2 -), _s16 AS ( - SELECT - _s0.s_name, - COUNT(*) AS n_rows, - SUM(( - NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - )) AS sum_is_intra_season +), _t1 AS ( + SELECT DISTINCT + _s9.s_name, + _s9.search_id FROM _s0 AS _s0 JOIN SEARCHES AS SEARCHES ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(SEARCHES.search_ts AS DATETIME)) @@ -46,6 +39,12 @@ WITH _s0 AS ( OR _s0.s_month3 = EXTRACT(MONTH FROM CAST(SEARCHES.search_ts AS DATETIME)) LEFT JOIN _s9 AS _s9 ON SEARCHES.search_id = _s9.search_id AND _s0.s_name = _s9.s_name +), _s16 AS ( + SELECT + s_name, + COUNT(*) AS n_rows, + SUM(TRUE) AS sum_is_intra_season + FROM _t1 GROUP BY 1 ), _s17 AS ( diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql index 1d2372745..1e46c2f36 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql @@ -13,8 +13,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT _s2.s_name, - searches.search_id, - COUNT(*) AS n_rows + searches.search_id FROM _s0 AS _s2 JOIN searches AS searches ON _s2.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS TIMESTAMP)) @@ -29,16 +28,10 @@ WITH _s0 AS ( OR _s7.s_month2 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS TIMESTAMP)) OR _s7.s_month3 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS TIMESTAMP)) ) - GROUP BY - 1, - 2 -), _s16 AS ( - SELECT - _s0.s_name, - COUNT(*) AS n_rows, - SUM(CASE WHEN ( - NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - ) THEN 1 ELSE 0 END) AS sum_is_intra_season +), _t1 AS ( + SELECT DISTINCT + _s9.s_name, + _s9.search_id FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS TIMESTAMP)) @@ -46,6 +39,12 @@ WITH _s0 AS ( OR _s0.s_month3 = EXTRACT(MONTH FROM CAST(searches.search_ts AS TIMESTAMP)) LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id +), _s16 AS ( + SELECT + s_name, + COUNT(*) AS n_rows, + SUM(1) AS sum_is_intra_season + FROM _t1 GROUP BY 1 ), _s17 AS ( diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql index 0f4672297..b1ca8ede5 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql @@ -13,8 +13,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT _s2.s_name, - searches.search_id, - COUNT(*) AS n_rows + searches.search_id FROM _s0 AS _s2 JOIN searches AS searches ON _s2.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) @@ -29,16 +28,10 @@ WITH _s0 AS ( OR _s7.s_month2 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) OR _s7.s_month3 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) ) - GROUP BY - 1, - 2 -), _s16 AS ( - SELECT - _s0.s_name, - COUNT(*) AS n_rows, - COUNT_IF(( - NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - )) AS sum_is_intra_season +), _t1 AS ( + SELECT DISTINCT + _s9.s_name, + _s9.search_id FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) @@ -46,6 +39,12 @@ WITH _s0 AS ( OR _s0.s_month3 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id +), _s16 AS ( + SELECT + s_name, + COUNT(*) AS n_rows, + COUNT_IF(TRUE) AS sum_is_intra_season + FROM _t1 GROUP BY 1 ), _s17 AS ( diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql index c2f6d6642..5f8b57232 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql @@ -13,8 +13,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT _s2.s_name, - searches.search_id, - COUNT(*) AS n_rows + searches.search_id FROM _s0 AS _s2 JOIN searches AS searches ON _s2.s_month1 = CAST(STRFTIME('%m', searches.search_ts) AS INTEGER) @@ -31,16 +30,10 @@ WITH _s0 AS ( OR _s7.s_month2 = CAST(STRFTIME('%m', _s5.ev_dt) AS INTEGER) OR _s7.s_month3 = CAST(STRFTIME('%m', _s5.ev_dt) AS INTEGER) ) - GROUP BY - 1, - 2 -), _s16 AS ( - SELECT - _s0.s_name, - COUNT(*) AS n_rows, - SUM(( - NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - )) AS sum_is_intra_season +), _t1 AS ( + SELECT DISTINCT + _s9.s_name, + _s9.search_id FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = CAST(STRFTIME('%m', searches.search_ts) AS INTEGER) @@ -48,6 +41,12 @@ WITH _s0 AS ( OR _s0.s_month3 = CAST(STRFTIME('%m', searches.search_ts) AS INTEGER) LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id +), _s16 AS ( + SELECT + s_name, + COUNT(*) AS n_rows, + SUM(TRUE) AS sum_is_intra_season + FROM _t1 GROUP BY 1 ), _s17 AS ( diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index b0023d3a7..4249f19e3 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -9,17 +9,13 @@ WITH _s0 AS ( FROM main.countries ), _s7 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.incidents - GROUP BY - 1 -), _s9 AS ( +), _t1 AS ( SELECT - _s3.co_id AS _id_3, - _s2.co_id, - COUNT(*) AS n_rows, - SUM(_s7.n_rows) AS sum_n_rows + ANY_VALUE(_s3.co_id) AS anything__id_3, + ANY_VALUE(_s2.co_id) AS anything_co_id, + COUNT(*) AS n_rows FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices @@ -27,6 +23,15 @@ WITH _s0 AS ( AND _s3.co_id = devices.de_purchase_country_id LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id + GROUP BY + _s7.in_device_id +), _s9 AS ( + SELECT + anything__id_3, + anything_co_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 GROUP BY 1, 2 @@ -38,7 +43,7 @@ SELECT FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 - ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 + ON _s0.co_id = _s9.anything_co_id AND _s1.co_id = _s9.anything__id_3 ORDER BY 3 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql index 575a620ff..d06dcc150 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql @@ -9,17 +9,13 @@ WITH _s0 AS ( FROM main.COUNTRIES ), _s7 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.INCIDENTS - GROUP BY - 1 -), _s9 AS ( +), _t1 AS ( SELECT - _s3.co_id AS _id_3, - _s2.co_id, - COUNT(*) AS n_rows, - SUM(_s7.n_rows) AS sum_n_rows + ANY_VALUE(_s3.co_id) AS anything__id_3, + ANY_VALUE(_s2.co_id) AS anything_co_id, + COUNT(*) AS n_rows FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.DEVICES AS DEVICES @@ -27,6 +23,15 @@ WITH _s0 AS ( AND DEVICES.de_purchase_country_id = _s3.co_id LEFT JOIN _s7 AS _s7 ON DEVICES.de_id = _s7.in_device_id + GROUP BY + _s7.in_device_id +), _s9 AS ( + SELECT + anything__id_3, + anything_co_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 GROUP BY 1, 2 @@ -38,7 +43,7 @@ SELECT FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 - ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 + ON _s0.co_id = _s9.anything_co_id AND _s1.co_id = _s9.anything__id_3 ORDER BY 3 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql index 37e678ff0..903dbeb65 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql @@ -9,17 +9,13 @@ WITH _s0 AS ( FROM main.countries ), _s7 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.incidents - GROUP BY - 1 -), _s9 AS ( +), _t1 AS ( SELECT - _s3.co_id AS _id_3, - _s2.co_id, - COUNT(*) AS n_rows, - SUM(_s7.n_rows) AS sum_n_rows + MAX(_s3.co_id) AS anything__id_3, + MAX(_s2.co_id) AS anything_co_id, + COUNT(*) AS n_rows FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices @@ -27,6 +23,15 @@ WITH _s0 AS ( AND _s3.co_id = devices.de_purchase_country_id LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id + GROUP BY + _s7.in_device_id +), _s9 AS ( + SELECT + anything__id_3, + anything_co_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 GROUP BY 1, 2 @@ -41,7 +46,7 @@ SELECT FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 - ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 + ON _s0.co_id = _s9.anything_co_id AND _s1.co_id = _s9.anything__id_3 ORDER BY 3 DESC NULLS LAST LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql index ee15a23c4..26924e69f 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -9,17 +9,13 @@ WITH _s0 AS ( FROM main.countries ), _s7 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.incidents - GROUP BY - 1 -), _s9 AS ( +), _t1 AS ( SELECT - _s3.co_id AS _id_3, - _s2.co_id, - COUNT(*) AS n_rows, - SUM(_s7.n_rows) AS sum_n_rows + ANY_VALUE(_s3.co_id) AS anything__id_3, + ANY_VALUE(_s2.co_id) AS anything_co_id, + COUNT(*) AS n_rows FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices @@ -27,6 +23,15 @@ WITH _s0 AS ( AND _s3.co_id = devices.de_purchase_country_id LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id + GROUP BY + _s7.in_device_id +), _s9 AS ( + SELECT + anything__id_3, + anything_co_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 GROUP BY 1, 2 @@ -38,7 +43,7 @@ SELECT FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 - ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 + ON _s0.co_id = _s9.anything_co_id AND _s1.co_id = _s9.anything__id_3 ORDER BY 3 DESC NULLS LAST LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index 37fb038e7..ea40864d6 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -9,17 +9,13 @@ WITH _s0 AS ( FROM main.countries ), _s7 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.incidents - GROUP BY - 1 -), _s9 AS ( +), _t1 AS ( SELECT - _s3.co_id AS _id_3, - _s2.co_id, - COUNT(*) AS n_rows, - SUM(_s7.n_rows) AS sum_n_rows + MAX(_s3.co_id) AS anything__id_3, + MAX(_s2.co_id) AS anything_co_id, + COUNT(*) AS n_rows FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices @@ -27,6 +23,15 @@ WITH _s0 AS ( AND _s3.co_id = devices.de_purchase_country_id LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id + GROUP BY + _s7.in_device_id +), _s9 AS ( + SELECT + anything__id_3, + anything_co_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 GROUP BY 1, 2 @@ -38,7 +43,7 @@ SELECT FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 - ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 + ON _s0.co_id = _s9.anything_co_id AND _s1.co_id = _s9.anything__id_3 ORDER BY 3 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index e28700188..331ebd37d 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -36,23 +36,23 @@ WITH _t2 AS ( ON _s5.in_device_id = devices.de_id GROUP BY 1 -), _s11 AS ( +), _t5 AS ( SELECT - in_device_id, + ANY_VALUE(users.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows - FROM _t2 - GROUP BY - 1 -), _s13 AS ( - SELECT - users.us_country_id, - COUNT(*) AS n_rows, - SUM(_s11.n_rows) AS sum_n_rows FROM main.users AS users JOIN main.devices AS devices ON devices.de_owner_id = users.us_id - LEFT JOIN _s11 AS _s11 + LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id + GROUP BY + _s11.in_device_id +), _s13 AS ( + SELECT + anything_us_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t5 GROUP BY 1 ) @@ -67,6 +67,6 @@ JOIN _s3 AS _s3 JOIN _s7 AS _s7 ON _s7.de_purchase_country_id = countries.co_id LEFT JOIN _s13 AS _s13 - ON _s13.us_country_id = countries.co_id + ON _s13.anything_us_country_id = countries.co_id ORDER BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql index c7b65cbda..65d1dcdda 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql @@ -36,23 +36,23 @@ WITH _t2 AS ( ON DEVICES.de_id = _s5.in_device_id GROUP BY 1 -), _s11 AS ( +), _t5 AS ( SELECT - in_device_id, + ANY_VALUE(USERS.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows - FROM _t2 - GROUP BY - 1 -), _s13 AS ( - SELECT - USERS.us_country_id, - COUNT(*) AS n_rows, - SUM(_s11.n_rows) AS sum_n_rows FROM main.USERS AS USERS JOIN main.DEVICES AS DEVICES ON DEVICES.de_owner_id = USERS.us_id - LEFT JOIN _s11 AS _s11 + LEFT JOIN _t2 AS _s11 ON DEVICES.de_id = _s11.in_device_id + GROUP BY + _s11.in_device_id +), _s13 AS ( + SELECT + anything_us_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t5 GROUP BY 1 ) @@ -67,6 +67,6 @@ JOIN _s3 AS _s3 JOIN _s7 AS _s7 ON COUNTRIES.co_id = _s7.de_purchase_country_id LEFT JOIN _s13 AS _s13 - ON COUNTRIES.co_id = _s13.us_country_id + ON COUNTRIES.co_id = _s13.anything_us_country_id ORDER BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql index d885247e9..01b74e04e 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql @@ -36,23 +36,23 @@ WITH _t2 AS ( ON _s5.in_device_id = devices.de_id GROUP BY 1 -), _s11 AS ( +), _t5 AS ( SELECT - in_device_id, + MAX(users.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows - FROM _t2 - GROUP BY - 1 -), _s13 AS ( - SELECT - users.us_country_id, - COUNT(*) AS n_rows, - SUM(_s11.n_rows) AS sum_n_rows FROM main.users AS users JOIN main.devices AS devices ON devices.de_owner_id = users.us_id - LEFT JOIN _s11 AS _s11 + LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id + GROUP BY + _s11.in_device_id +), _s13 AS ( + SELECT + anything_us_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t5 GROUP BY 1 ) @@ -76,6 +76,6 @@ JOIN _s3 AS _s3 JOIN _s7 AS _s7 ON _s7.de_purchase_country_id = countries.co_id LEFT JOIN _s13 AS _s13 - ON _s13.us_country_id = countries.co_id + ON _s13.anything_us_country_id = countries.co_id ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql index 32f0db70a..c51990a88 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql @@ -36,23 +36,23 @@ WITH _t2 AS ( ON _s5.in_device_id = devices.de_id GROUP BY 1 -), _s11 AS ( +), _t5 AS ( SELECT - in_device_id, + ANY_VALUE(users.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows - FROM _t2 - GROUP BY - 1 -), _s13 AS ( - SELECT - users.us_country_id, - COUNT(*) AS n_rows, - SUM(_s11.n_rows) AS sum_n_rows FROM main.users AS users JOIN main.devices AS devices ON devices.de_owner_id = users.us_id - LEFT JOIN _s11 AS _s11 + LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id + GROUP BY + _s11.in_device_id +), _s13 AS ( + SELECT + anything_us_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t5 GROUP BY 1 ) @@ -67,6 +67,6 @@ JOIN _s3 AS _s3 JOIN _s7 AS _s7 ON _s7.de_purchase_country_id = countries.co_id LEFT JOIN _s13 AS _s13 - ON _s13.us_country_id = countries.co_id + ON _s13.anything_us_country_id = countries.co_id ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index d33b848ea..117f8a414 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -36,23 +36,23 @@ WITH _t2 AS ( ON _s5.in_device_id = devices.de_id GROUP BY 1 -), _s11 AS ( +), _t5 AS ( SELECT - in_device_id, + MAX(users.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows - FROM _t2 - GROUP BY - 1 -), _s13 AS ( - SELECT - users.us_country_id, - COUNT(*) AS n_rows, - SUM(_s11.n_rows) AS sum_n_rows FROM main.users AS users JOIN main.devices AS devices ON devices.de_owner_id = users.us_id - LEFT JOIN _s11 AS _s11 + LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id + GROUP BY + _s11.in_device_id +), _s13 AS ( + SELECT + anything_us_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t5 GROUP BY 1 ) @@ -67,6 +67,6 @@ JOIN _s3 AS _s3 JOIN _s7 AS _s7 ON _s7.de_purchase_country_id = countries.co_id LEFT JOIN _s13 AS _s13 - ON _s13.us_country_id = countries.co_id + ON _s13.anything_us_country_id = countries.co_id ORDER BY 1 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index e1ae95db5..7d3d9d3fb 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -1,14 +1,10 @@ WITH _s3 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.incidents - GROUP BY - 1 -), _s5 AS ( +), _t1 AS ( SELECT - COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, - devices.de_production_country_id, + ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.devices AS devices JOIN main.products AS products @@ -16,13 +12,21 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - 2 + _s3.in_device_id +), _s5 AS ( + SELECT + anything_de_production_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 + GROUP BY + 1 ) SELECT countries.co_name AS country, - ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_rows, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 - ON _s5.de_production_country_id = countries.co_id + ON _s5.anything_de_production_country_id = countries.co_id ORDER BY 1 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql index e0eb772d3..2eb2686d3 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql @@ -1,14 +1,10 @@ WITH _s3 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.INCIDENTS - GROUP BY - 1 -), _s5 AS ( +), _t1 AS ( SELECT - COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, - DEVICES.de_production_country_id, + ANY_VALUE(DEVICES.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.DEVICES AS DEVICES JOIN main.PRODUCTS AS PRODUCTS @@ -16,13 +12,21 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON DEVICES.de_id = _s3.in_device_id GROUP BY - 2 + _s3.in_device_id +), _s5 AS ( + SELECT + anything_de_production_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 + GROUP BY + 1 ) SELECT COUNTRIES.co_name COLLATE utf8mb4_bin AS country, - ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_rows, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.COUNTRIES AS COUNTRIES LEFT JOIN _s5 AS _s5 - ON COUNTRIES.co_id = _s5.de_production_country_id + ON COUNTRIES.co_id = _s5.anything_de_production_country_id ORDER BY 1 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql index 762d308b6..72f3de996 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql @@ -1,14 +1,10 @@ WITH _s3 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.incidents - GROUP BY - 1 -), _s5 AS ( +), _t1 AS ( SELECT - COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, - devices.de_production_country_id, + MAX(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.devices AS devices JOIN main.products AS products @@ -16,16 +12,24 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - 2 + _s3.in_device_id +), _s5 AS ( + SELECT + anything_de_production_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 + GROUP BY + 1 ) SELECT countries.co_name AS country, ROUND( - CAST(CAST(COALESCE(_s5.sum_n_incidents, 0) AS DOUBLE PRECISION) / COALESCE(_s5.n_rows, 0) AS DECIMAL), + CAST(CAST(COALESCE(_s5.sum_n_rows, 0) AS DOUBLE PRECISION) / COALESCE(_s5.n_rows, 0) AS DECIMAL), 2 ) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 - ON _s5.de_production_country_id = countries.co_id + ON _s5.anything_de_production_country_id = countries.co_id ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql index d96e02613..dd7dddaeb 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -1,14 +1,10 @@ WITH _s3 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.incidents - GROUP BY - 1 -), _s5 AS ( +), _t1 AS ( SELECT - COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, - devices.de_production_country_id, + ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.devices AS devices JOIN main.products AS products @@ -16,13 +12,21 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - 2 + _s3.in_device_id +), _s5 AS ( + SELECT + anything_de_production_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 + GROUP BY + 1 ) SELECT countries.co_name AS country, - ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_rows, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 - ON _s5.de_production_country_id = countries.co_id + ON _s5.anything_de_production_country_id = countries.co_id ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index ebc28a12e..a5f88e272 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -1,14 +1,10 @@ WITH _s3 AS ( SELECT - in_device_id, - COUNT(*) AS n_rows + in_device_id FROM main.incidents - GROUP BY - 1 -), _s5 AS ( +), _t1 AS ( SELECT - COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, - devices.de_production_country_id, + MAX(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.devices AS devices JOIN main.products AS products @@ -16,13 +12,21 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - 2 + _s3.in_device_id +), _s5 AS ( + SELECT + anything_de_production_country_id, + COUNT(*) AS n_rows, + SUM(n_rows) AS sum_n_rows + FROM _t1 + GROUP BY + 1 ) SELECT countries.co_name AS country, - ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(CAST(COALESCE(_s5.sum_n_rows, 0) AS REAL) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 - ON _s5.de_production_country_id = countries.co_id + ON _s5.anything_de_production_country_id = countries.co_id ORDER BY 1 diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index d6d5ad4c5..f139a043c 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -38,8 +38,7 @@ WITH _t5 AS ( AND lineitem.l_orderkey = orders.o_orderkey ), _s13 AS ( SELECT - _t3.anything_l_suppkey, - COUNT(*) AS n_rows + _t3.anything_l_suppkey FROM _t3 AS _t3 JOIN _s11 AS _s11 ON _s11.l_linenumber = _t3.l_linenumber @@ -47,17 +46,17 @@ WITH _t5 AS ( AND _s11.o_orderkey = _t3.o_orderkey WHERE _t3.anything_o_orderstatus = 'F' - GROUP BY - 1 ) SELECT - supplier.s_name AS S_NAME, - COALESCE(_s13.n_rows, 0) AS NUMWAIT + ANY_VALUE(supplier.s_name) AS S_NAME, + COUNT(*) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey LEFT JOIN _s13 AS _s13 ON _s13.anything_l_suppkey = supplier.s_suppkey +GROUP BY + _s13.anything_l_suppkey ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index d96bf2842..bb319b113 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -42,8 +42,7 @@ WITH _t5 AS ( 3 ), _s13 AS ( SELECT - _t3.anything_l_suppkey, - COUNT(*) AS n_rows + _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 @@ -51,17 +50,17 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL - GROUP BY - 1 ) SELECT - SUPPLIER.s_name COLLATE utf8mb4_bin AS S_NAME, - COALESCE(_s13.n_rows, 0) AS NUMWAIT + ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, + COUNT(*) AS NUMWAIT FROM tpch.SUPPLIER AS SUPPLIER JOIN tpch.NATION AS NATION ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey LEFT JOIN _s13 AS _s13 ON SUPPLIER.s_suppkey = _s13.anything_l_suppkey +GROUP BY + _s13.anything_l_suppkey ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index a69ae973d..54e706b70 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -42,8 +42,7 @@ WITH _t5 AS ( 3 ), _s13 AS ( SELECT - _t3.anything_l_suppkey, - COUNT(*) AS n_rows + _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 @@ -51,17 +50,17 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL - GROUP BY - 1 ) SELECT - supplier.s_name AS S_NAME, - COALESCE(_s13.n_rows, 0) AS NUMWAIT + MAX(supplier.s_name) AS S_NAME, + COUNT(*) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey LEFT JOIN _s13 AS _s13 ON _s13.anything_l_suppkey = supplier.s_suppkey +GROUP BY + _s13.anything_l_suppkey ORDER BY 2 DESC NULLS LAST, 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index 8ee67cd23..7b4e2eed9 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -42,8 +42,7 @@ WITH _t5 AS ( 3 ), _s13 AS ( SELECT - _t3.anything_l_suppkey, - COUNT(*) AS n_rows + _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 @@ -51,17 +50,17 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL - GROUP BY - 1 ) SELECT - supplier.s_name AS S_NAME, - COALESCE(_s13.n_rows, 0) AS NUMWAIT + ANY_VALUE(supplier.s_name) AS S_NAME, + COUNT(*) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey LEFT JOIN _s13 AS _s13 ON _s13.anything_l_suppkey = supplier.s_suppkey +GROUP BY + _s13.anything_l_suppkey ORDER BY 2 DESC NULLS LAST, 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index 012796bc0..c8aea7555 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -42,8 +42,7 @@ WITH _t5 AS ( 3 ), _s13 AS ( SELECT - _t3.anything_l_suppkey, - COUNT(*) AS n_rows + _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 @@ -51,17 +50,17 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL - GROUP BY - 1 ) SELECT - supplier.s_name AS S_NAME, - COALESCE(_s13.n_rows, 0) AS NUMWAIT + MAX(supplier.s_name) AS S_NAME, + COUNT(*) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey LEFT JOIN _s13 AS _s13 ON _s13.anything_l_suppkey = supplier.s_suppkey +GROUP BY + _s13.anything_l_suppkey ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q22_ansi.sql b/tests/test_sql_refsols/tpch_q22_ansi.sql index 807ca5707..66a4bd6df 100644 --- a/tests/test_sql_refsols/tpch_q22_ansi.sql +++ b/tests/test_sql_refsols/tpch_q22_ansi.sql @@ -7,24 +7,29 @@ WITH _s0 AS ( AND c_acctbal > 0.0 ), _s3 AS ( SELECT - o_custkey, - COUNT(*) AS n_rows + o_custkey FROM tpch.orders +), _t2 AS ( + SELECT + ANY_VALUE(customer.c_acctbal) AS anything_c_acctbal, + ANY_VALUE(customer.c_phone) AS anything_c_phone, + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey GROUP BY - 1 + _s3.o_custkey ) SELECT - SUBSTRING(customer.c_phone, 1, 2) AS CNTRY_CODE, + SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL -FROM _s0 AS _s0 -JOIN tpch.customer AS customer - ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') - AND _s0.avg_c_acctbal < customer.c_acctbal -LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey + COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL +FROM _t2 WHERE - _s3.n_rows = 0 OR _s3.n_rows IS NULL + n_rows = 0 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_mysql.sql b/tests/test_sql_refsols/tpch_q22_mysql.sql index 2f44d9b27..bcba0ba2f 100644 --- a/tests/test_sql_refsols/tpch_q22_mysql.sql +++ b/tests/test_sql_refsols/tpch_q22_mysql.sql @@ -7,24 +7,29 @@ WITH _s0 AS ( AND SUBSTRING(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') ), _s3 AS ( SELECT - o_custkey, - COUNT(*) AS n_rows + o_custkey FROM tpch.ORDERS +), _t2 AS ( + SELECT + ANY_VALUE(CUSTOMER.c_acctbal) AS anything_c_acctbal, + ANY_VALUE(CUSTOMER.c_phone) AS anything_c_phone, + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN tpch.CUSTOMER AS CUSTOMER + ON CUSTOMER.c_acctbal > _s0.avg_c_acctbal + AND SUBSTRING(CUSTOMER.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + LEFT JOIN _s3 AS _s3 + ON CUSTOMER.c_custkey = _s3.o_custkey GROUP BY - 1 + _s3.o_custkey ) SELECT - SUBSTRING(CUSTOMER.c_phone, 1, 2) COLLATE utf8mb4_bin AS CNTRY_CODE, + SUBSTRING(anything_c_phone, 1, 2) COLLATE utf8mb4_bin AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(CUSTOMER.c_acctbal), 0) AS TOTACCTBAL -FROM _s0 AS _s0 -JOIN tpch.CUSTOMER AS CUSTOMER - ON CUSTOMER.c_acctbal > _s0.avg_c_acctbal - AND SUBSTRING(CUSTOMER.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') -LEFT JOIN _s3 AS _s3 - ON CUSTOMER.c_custkey = _s3.o_custkey + COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL +FROM _t2 WHERE - _s3.n_rows = 0 OR _s3.n_rows IS NULL + n_rows = 0 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_postgres.sql b/tests/test_sql_refsols/tpch_q22_postgres.sql index b881feea5..eb1c7ae9c 100644 --- a/tests/test_sql_refsols/tpch_q22_postgres.sql +++ b/tests/test_sql_refsols/tpch_q22_postgres.sql @@ -7,24 +7,29 @@ WITH _s0 AS ( AND c_acctbal > 0.0 ), _s3 AS ( SELECT - o_custkey, - COUNT(*) AS n_rows + o_custkey FROM tpch.orders +), _t2 AS ( + SELECT + MAX(customer.c_acctbal) AS anything_c_acctbal, + MAX(customer.c_phone) AS anything_c_phone, + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey GROUP BY - 1 + _s3.o_custkey ) SELECT - SUBSTRING(customer.c_phone FROM 1 FOR 2) AS CNTRY_CODE, + SUBSTRING(anything_c_phone FROM 1 FOR 2) AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL -FROM _s0 AS _s0 -JOIN tpch.customer AS customer - ON SUBSTRING(customer.c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17') - AND _s0.avg_c_acctbal < customer.c_acctbal -LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey + COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL +FROM _t2 WHERE - _s3.n_rows = 0 OR _s3.n_rows IS NULL + n_rows = 0 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_snowflake.sql b/tests/test_sql_refsols/tpch_q22_snowflake.sql index 96ff00430..ccde6e030 100644 --- a/tests/test_sql_refsols/tpch_q22_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q22_snowflake.sql @@ -7,24 +7,29 @@ WITH _s0 AS ( AND c_acctbal > 0.0 ), _s3 AS ( SELECT - o_custkey, - COUNT(*) AS n_rows + o_custkey FROM tpch.orders +), _t2 AS ( + SELECT + ANY_VALUE(customer.c_acctbal) AS anything_c_acctbal, + ANY_VALUE(customer.c_phone) AS anything_c_phone, + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey GROUP BY - 1 + _s3.o_custkey ) SELECT - SUBSTRING(customer.c_phone, 1, 2) AS CNTRY_CODE, + SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL -FROM _s0 AS _s0 -JOIN tpch.customer AS customer - ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') - AND _s0.avg_c_acctbal < customer.c_acctbal -LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey + COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL +FROM _t2 WHERE - _s3.n_rows = 0 OR _s3.n_rows IS NULL + n_rows = 0 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_sqlite.sql b/tests/test_sql_refsols/tpch_q22_sqlite.sql index 807ca5707..f422529b1 100644 --- a/tests/test_sql_refsols/tpch_q22_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q22_sqlite.sql @@ -7,24 +7,29 @@ WITH _s0 AS ( AND c_acctbal > 0.0 ), _s3 AS ( SELECT - o_custkey, - COUNT(*) AS n_rows + o_custkey FROM tpch.orders +), _t2 AS ( + SELECT + MAX(customer.c_acctbal) AS anything_c_acctbal, + MAX(customer.c_phone) AS anything_c_phone, + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey GROUP BY - 1 + _s3.o_custkey ) SELECT - SUBSTRING(customer.c_phone, 1, 2) AS CNTRY_CODE, + SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL -FROM _s0 AS _s0 -JOIN tpch.customer AS customer - ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') - AND _s0.avg_c_acctbal < customer.c_acctbal -LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey + COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL +FROM _t2 WHERE - _s3.n_rows = 0 OR _s3.n_rows IS NULL + n_rows = 0 GROUP BY 1 ORDER BY From 473cc2406349817cae0b908d0db92a834862c7b8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 21 Oct 2025 13:59:14 -0400 Subject: [PATCH 112/143] Identified bug in left join case: need to aggregate on LHS key --- demos/notebooks/4_tpch.ipynb | 2 +- .../conversion/join_aggregate_transpose.py | 34 ++++- tests/test_plan_refsols/common_prefix_ag.txt | 72 ++++++----- tests/test_plan_refsols/common_prefix_ah.txt | 2 +- tests/test_plan_refsols/common_prefix_ai.txt | 60 ++++----- tests/test_plan_refsols/common_prefix_aj.txt | 72 ++++++----- tests/test_plan_refsols/common_prefix_ak.txt | 56 ++++---- tests/test_plan_refsols/common_prefix_an.txt | 21 +-- tests/test_plan_refsols/common_prefix_ao.txt | 6 +- tests/test_plan_refsols/common_prefix_u.txt | 2 +- tests/test_plan_refsols/common_prefix_x.txt | 2 +- tests/test_plan_refsols/common_prefix_y.txt | 15 ++- tests/test_plan_refsols/correl_1.txt | 2 +- tests/test_plan_refsols/correl_2.txt | 2 +- .../cryptbank_general_join_01_raw.txt | 17 +-- .../cryptbank_general_join_01_rewrite.txt | 17 +-- .../epoch_intra_season_searches.txt | 4 +- tests/test_plan_refsols/singular7.txt | 23 ++-- ...chnograph_country_combination_analysis.txt | 2 +- ...nograph_country_incident_rate_analysis.txt | 2 +- ..._error_rate_sun_set_by_factory_country.txt | 6 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- tests/test_plan_refsols/tpch_q22.txt | 2 +- .../window_filter_order_1.txt | 2 +- .../window_filter_order_2.txt | 2 +- .../window_filter_order_3.txt | 2 +- .../window_filter_order_8.txt | 2 +- tests/test_sql_refsols/correl_1_sqlite.sql | 24 ++-- tests/test_sql_refsols/correl_2_sqlite.sql | 32 +++-- .../cryptbank_general_join_01_raw_sqlite.sql | 122 ++++++++++-------- ...yptbank_general_join_01_rewrite_sqlite.sql | 122 ++++++++++-------- .../defog_broker_adv10_ansi.sql | 30 +++-- .../defog_broker_adv10_mysql.sql | 30 +++-- .../defog_broker_adv10_postgres.sql | 30 +++-- .../defog_broker_adv10_snowflake.sql | 30 +++-- .../defog_broker_adv10_sqlite.sql | 30 +++-- .../defog_dealership_adv3_ansi.sql | 27 ++-- .../defog_dealership_adv3_mysql.sql | 27 ++-- .../defog_dealership_adv3_postgres.sql | 27 ++-- .../defog_dealership_adv3_snowflake.sql | 27 ++-- .../defog_dealership_adv3_sqlite.sql | 27 ++-- .../defog_dealership_adv4_ansi.sql | 30 +++-- .../defog_dealership_adv4_mysql.sql | 30 +++-- .../defog_dealership_adv4_postgres.sql | 30 +++-- .../defog_dealership_adv4_snowflake.sql | 30 +++-- .../defog_dealership_adv4_sqlite.sql | 30 +++-- .../defog_ewallet_basic10_ansi.sql | 23 ++-- .../defog_ewallet_basic10_mysql.sql | 23 ++-- .../defog_ewallet_basic10_postgres.sql | 23 ++-- .../defog_ewallet_basic10_snowflake.sql | 23 ++-- .../defog_ewallet_basic10_sqlite.sql | 23 ++-- .../epoch_intra_season_searches_ansi.sql | 12 +- .../epoch_intra_season_searches_mysql.sql | 12 +- .../epoch_intra_season_searches_postgres.sql | 18 ++- .../epoch_intra_season_searches_snowflake.sql | 12 +- .../epoch_intra_season_searches_sqlite.sql | 12 +- ...raph_country_combination_analysis_ansi.sql | 5 +- ...aph_country_combination_analysis_mysql.sql | 5 +- ..._country_combination_analysis_postgres.sql | 5 +- ...country_combination_analysis_snowflake.sql | 5 +- ...ph_country_combination_analysis_sqlite.sql | 5 +- ...ph_country_incident_rate_analysis_ansi.sql | 5 +- ...h_country_incident_rate_analysis_mysql.sql | 5 +- ...ountry_incident_rate_analysis_postgres.sql | 5 +- ...untry_incident_rate_analysis_snowflake.sql | 5 +- ..._country_incident_rate_analysis_sqlite.sql | 5 +- ...r_rate_sun_set_by_factory_country_ansi.sql | 7 +- ..._rate_sun_set_by_factory_country_mysql.sql | 7 +- ...te_sun_set_by_factory_country_postgres.sql | 7 +- ...e_sun_set_by_factory_country_snowflake.sql | 7 +- ...rate_sun_set_by_factory_country_sqlite.sql | 7 +- tests/test_sql_refsols/tpch_q21_ansi.sql | 24 ++-- tests/test_sql_refsols/tpch_q21_mysql.sql | 24 ++-- tests/test_sql_refsols/tpch_q21_postgres.sql | 24 ++-- tests/test_sql_refsols/tpch_q21_snowflake.sql | 24 ++-- tests/test_sql_refsols/tpch_q21_sqlite.sql | 24 ++-- tests/test_sql_refsols/tpch_q22_ansi.sql | 7 +- tests/test_sql_refsols/tpch_q22_mysql.sql | 7 +- tests/test_sql_refsols/tpch_q22_postgres.sql | 7 +- tests/test_sql_refsols/tpch_q22_snowflake.sql | 7 +- tests/test_sql_refsols/tpch_q22_sqlite.sql | 7 +- 81 files changed, 960 insertions(+), 627 deletions(-) diff --git a/demos/notebooks/4_tpch.ipynb b/demos/notebooks/4_tpch.ipynb index bc08f1602..78c46b90a 100644 --- a/demos/notebooks/4_tpch.ipynb +++ b/demos/notebooks/4_tpch.ipynb @@ -1695,7 +1695,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "pydough", "language": "python", "name": "python3" }, diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index d315a86e4..b7ee2d08a 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -25,6 +25,7 @@ apply_substitution, extract_equijoin_keys, ) +from pydough.types import NumericType class JoinAggregateTransposeShuttle(RelationalShuttle): @@ -110,6 +111,8 @@ def join_aggregate_transpose( The new RelationalNode tree with the Join and Aggregate transposed, or None if the transpose is not possible. """ + join_name: str + agg_name: str # The cardinality with regards to the input being considered must be # singular (unless the aggregations allow plural), and must be @@ -165,6 +168,32 @@ def join_aggregate_transpose( ): return None + # Extract the join key references from both sides of the join in the + # order they appear in the join condition. + agg_key_refs, non_agg_key_refs = extract_equijoin_keys(join) + if not is_left: + agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs + + # Now that the transpose is deemed possible, if in the left join + # scenario, transform any `COUNT(*)` calls into `COUNT(col)`, where + # `col` is one of the aggregation keys. If this is not possible, then + # abort. + if left_join_case and any( + agg.op == pydop.COUNT and len(agg.inputs) == 0 + for agg in aggregate.aggregations.values() + ): + if len(agg_key_refs) == 0: + return None + key_expr: RelationalExpression = aggregate.keys[agg_key_refs[0].name] + new_call: CallExpression = CallExpression( + pydop.COUNT, + NumericType(), + [key_expr], + ) + for agg_name, agg in aggregate.aggregations.items(): + if agg.op == pydop.COUNT and len(agg.inputs) == 0: + aggregate.aggregations[agg_name] = new_call + # TODO ADD COMMENTS agg_alias: str | None = ( join.default_input_aliases[0] if is_left else join.default_input_aliases[1] @@ -207,8 +236,6 @@ def join_aggregate_transpose( # into the join's columns so that the aggregate keys/aggregations can # refer to them with the same names, without any renaming caused by # conflicts. - join_name: str - agg_name: str for col_name, col_expr in agg_input.columns.items(): join_name = self.generate_name(col_name, new_join_columns) new_join_columns[join_name] = add_input_name(col_expr, agg_alias) @@ -241,9 +268,6 @@ def join_aggregate_transpose( # For each join key from the non-aggregate side, alter its substitution # to map it to the corresponding key from the aggregate side. - agg_key_refs, non_agg_key_refs = extract_equijoin_keys(join) - if not is_left: - agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs for agg_key, non_agg_key in zip(agg_key_refs, non_agg_key_refs): join_sub[non_agg_key] = join_sub[agg_key] diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 7874aaef6..049eef756 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_n_rows': COUNT(), 'sum_sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(c_custkey), 1:numeric, 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,33 +10,35 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndis SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows * IFF(PRESENT(o_orderkey), 1:numeric, 0:numeric), 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index b510473c6..56d52db6b 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(o_orderkey), 1:numeric, 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index ff8d344eb..c1c1d47cd 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,29 +1,31 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_n_rows': COUNT(), 'sum_sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(c_custkey), 1:numeric, 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index 632a4a221..c2ae42100 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_n_rows': COUNT(), 'sum_sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(c_custkey), 1:numeric, 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,33 +10,35 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndis SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows * IFF(PRESENT(o_orderkey), 1:numeric, 0:numeric), 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index a480a8f39..2a839c988 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_n_rows': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(c_custkey), 1:numeric, 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,31 +10,33 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndis SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows * IFF(PRESENT(o_orderkey), 1:numeric, 0:numeric)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 4f7765ee4..4873ae241 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('cust_key', anything_o_custkey), ('n_orders', n_rows), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(anything_o_custkey):asc_first]) - FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & n_rows > RELAVG(args=[n_rows], partition=[anything_c_nationkey], order=[]) & sum_n_rows > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) +ROOT(columns=[('cust_key', anything_o_custkey), ('n_orders', n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric)), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(anything_o_custkey):asc_first]) + FILTER(condition=n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric) > RELAVG(args=[n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric)], partition=[anything_c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'anything_c_nationkey': ANYTHING(c_nationkey), 'anything_n_rows_0': ANYTHING(n_rows_0), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'n_rows_0': t0.n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) @@ -10,11 +10,12 @@ ROOT(columns=[('cust_key', anything_o_custkey), ('n_orders', n_rows), ('n_no_tax SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric)}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 9b07af230..76d840838 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', n_rows), ('n_part_purchases', anything_sum_n_rows)], orderings=[(o_custkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]), columns={'anything_n_rows': anything_n_rows, 'anything_sum_n_rows': anything_sum_n_rows, 'n_rows': n_rows, 'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)), ('n_part_purchases', anything_sum_n_rows)], orderings=[(o_custkey):asc_first], limit=5:numeric) + FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) > 0:numeric & DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]), columns={'anything_n_rows': anything_n_rows, 'anything_sum_n_rows': anything_sum_n_rows, 'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'anything_sum_n_rows': ANYTHING(sum_n_rows), 'n_rows': COUNT()}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey, 'sum_n_rows': t0.sum_n_rows}) LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) @@ -11,7 +11,7 @@ ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric))}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 4a94a7d76..096c535f5 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -6,7 +6,7 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:n FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_n_rows': SUM(n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric)), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_quantity': t1.l_quantity, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index aedddfbd4..cdd9874a0 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -2,7 +2,7 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_ JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric))}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 2cf917080..988bda8ed 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,10 +1,11 @@ -ROOT(columns=[('name', anything_c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (anything_c_name):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'n_rows': n_rows}) +ROOT(columns=[('name', anything_c_name), ('n_orders', n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric))], orderings=[(n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric)):desc_last, (anything_c_name):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) + PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric)}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index a9637676f..b7c906649 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('region_name', anything_r_name), ('n_prefix_nations', n_rows)], orderings=[(anything_r_name):asc_first]) +ROOT(columns=[('region_name', anything_r_name), ('n_prefix_nations', n_rows * IFF(PRESENT(n_regionkey), 1:numeric, 0:numeric))], orderings=[(anything_r_name):asc_first]) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'anything_r_name': ANYTHING(r_name), 'n_rows': COUNT()}) JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index 74b9a55e0..46082d9d5 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', anything_n_name), ('n_selected_custs', n_rows)], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('name', anything_n_name), ('n_selected_custs', n_rows * IFF(PRESENT(c_nationkey), 1:numeric, 0:numeric))], orderings=[(anything_n_name):asc_first]) AGGREGATE(keys={'c_nationkey': c_nationkey, 'expr_1': SLICE(c_comment, None:unknown, 1:numeric, None:unknown)}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == SLICE(t1.c_comment, None:unknown, 1:numeric, None:unknown) & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_comment': t1.c_comment, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt index 1e03527af..790b89a6e 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt @@ -1,11 +1,12 @@ ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': NDISTINCT(UNMASK::((42 - ([c_key])))), 'sum_n_rows': COUNT()}) - JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == UNMASK::((42 - ([t1.c_key]))), type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t1.b_key, 'c_key': t1.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(b_key), 1:numeric, 0:numeric))}) + AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == UNMASK::((42 - ([t1.c_key]))), type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t1.b_key, 'c_key': t1.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) + JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt index 1e03527af..790b89a6e 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt @@ -1,11 +1,12 @@ ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': NDISTINCT(UNMASK::((42 - ([c_key])))), 'sum_n_rows': COUNT()}) - JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == UNMASK::((42 - ([t1.c_key]))), type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t1.b_key, 'c_key': t1.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(b_key), 1:numeric, 0:numeric))}) + AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == UNMASK::((42 - ([t1.c_key]))), type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t1.b_key, 'c_key': t1.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) + JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 712f469bb..0326f9228 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,7 +1,7 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(True:bool)}) - AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(n_rows * IFF(PRESENT(search_id), 1:numeric, 0:numeric) > 0:numeric)}) + AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t1.s_name, 'search_id': t1.search_id}) JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 272a45e1c..2a8715dbf 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,13 +1,14 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (s_name):asc_first], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'n_rows': t1.n_rows, 's_name': t0.s_name}) +ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'n_orders': t1.n_orders, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[l_suppkey], order=[(n_rows):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'l_suppkey': l_suppkey, 'n_rows': n_rows}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'anything_p_name': anything_p_name, 'l_suppkey': l_suppkey, 'n_orders': n_rows * IFF(PRESENT(l_suppkey), 1:numeric, 0:numeric)}) + FILTER(condition=RANKING(args=[], partition=[l_suppkey], order=[(n_rows * IFF(PRESENT(l_suppkey), 1:numeric, 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'l_suppkey': l_suppkey, 'n_rows': n_rows}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 0cb12afcc..3e2f698b4 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -3,7 +3,7 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(in_device_id), 1:numeric, 0:numeric))}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'n_rows': COUNT()}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index 96d6186b2..b883522be 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -13,7 +13,7 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(in_device_id), 1:numeric, 0:numeric))}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything_us_country_id': ANYTHING(us_country_id), 'n_rows': COUNT()}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'in_device_id': t1.in_device_id, 'us_country_id': t0.us_country_id}) JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 0b5063ff5..4d5557ea3 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.anything_de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.anything_de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows * IFF(PRESENT(in_device_id), 1:numeric, 0:numeric))}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'n_rows': COUNT()}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index e033db867..dd2e4a7e7 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', n_rows)], orderings=[(n_rows):desc_last, (anything_s_name):asc_first], limit=10:numeric) +ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', n_rows * IFF(PRESENT(anything_l_suppkey), 1:numeric, 0:numeric))], orderings=[(n_rows * IFF(PRESENT(anything_l_suppkey), 1:numeric, 0:numeric)):desc_last, (anything_s_name):asc_first], limit=10:numeric) AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT()}) JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t1.anything_l_suppkey, 's_name': t0.s_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 8648c2c5b..278a1453a 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,6 +1,6 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_anything_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) AGGREGATE(keys={'cntry_code': SLICE(anything_c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_anything_c_acctbal': SUM(anything_c_acctbal)}) - FILTER(condition=n_rows == 0:numeric, columns={'anything_c_acctbal': anything_c_acctbal, 'anything_c_phone': anything_c_phone}) + FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) == 0:numeric, columns={'anything_c_acctbal': anything_c_acctbal, 'anything_c_phone': anything_c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_phone': ANYTHING(c_phone), 'n_rows': COUNT()}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'o_custkey': t1.o_custkey}) JOIN(condition=t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index faf791703..92480d257 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) + FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) < RELAVG(args=[n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)], partition=[], order=[]) & n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) > 0:numeric, columns={}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index faf791703..92480d257 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) + FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) < RELAVG(args=[n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)], partition=[], order=[]) & n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) > 0:numeric, columns={}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index faf791703..92480d257 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) + FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) < RELAVG(args=[n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)], partition=[], order=[]) & n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) > 0:numeric, columns={}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 6fb58180b..9ea60e8f5 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ABSENT(n_rows) & anything_c_acctbal < RELSUM(args=[n_rows], partition=[], order=[]), columns={}) + FILTER(condition=ABSENT(n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)) & anything_c_acctbal < RELSUM(args=[n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)], partition=[], order=[]), columns={}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'n_rows': COUNT()}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) diff --git a/tests/test_sql_refsols/correl_1_sqlite.sql b/tests/test_sql_refsols/correl_1_sqlite.sql index fe7e918fa..3491077be 100644 --- a/tests/test_sql_refsols/correl_1_sqlite.sql +++ b/tests/test_sql_refsols/correl_1_sqlite.sql @@ -3,16 +3,22 @@ WITH _s1 AS ( n_name, n_regionkey FROM tpch.nation +), _t0 AS ( + SELECT + _s1.n_regionkey, + MAX(region.r_name) AS anything_r_name, + COUNT(*) AS n_rows + FROM tpch.region AS region + LEFT JOIN _s1 AS _s1 + ON SUBSTRING(_s1.n_name, 1, 1) = SUBSTRING(region.r_name, 1, 1) + AND _s1.n_regionkey = region.r_regionkey + GROUP BY + 1, + SUBSTRING(_s1.n_name, 1, 1) ) SELECT - MAX(region.r_name) AS region_name, - COUNT(*) AS n_prefix_nations -FROM tpch.region AS region -LEFT JOIN _s1 AS _s1 - ON SUBSTRING(_s1.n_name, 1, 1) = SUBSTRING(region.r_name, 1, 1) - AND _s1.n_regionkey = region.r_regionkey -GROUP BY - _s1.n_regionkey, - SUBSTRING(_s1.n_name, 1, 1) + anything_r_name AS region_name, + n_rows * IIF(NOT n_regionkey IS NULL, 1, 0) AS n_prefix_nations +FROM _t0 ORDER BY 1 diff --git a/tests/test_sql_refsols/correl_2_sqlite.sql b/tests/test_sql_refsols/correl_2_sqlite.sql index 3ae3cfb6e..6fc203eb2 100644 --- a/tests/test_sql_refsols/correl_2_sqlite.sql +++ b/tests/test_sql_refsols/correl_2_sqlite.sql @@ -3,20 +3,26 @@ WITH _s3 AS ( c_comment, c_nationkey FROM tpch.customer +), _t0 AS ( + SELECT + _s3.c_nationkey, + MAX(nation.n_name) AS anything_n_name, + COUNT(*) AS n_rows + FROM tpch.region AS region + JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey + LEFT JOIN _s3 AS _s3 + ON LOWER(SUBSTRING(region.r_name, 1, 1)) = SUBSTRING(_s3.c_comment, 1, 1) + AND _s3.c_nationkey = nation.n_nationkey + WHERE + NOT region.r_name LIKE 'A%' + GROUP BY + 1, + SUBSTRING(_s3.c_comment, 1, 1) ) SELECT - MAX(nation.n_name) AS name, - COUNT(*) AS n_selected_custs -FROM tpch.region AS region -JOIN tpch.nation AS nation - ON nation.n_regionkey = region.r_regionkey -LEFT JOIN _s3 AS _s3 - ON LOWER(SUBSTRING(region.r_name, 1, 1)) = SUBSTRING(_s3.c_comment, 1, 1) - AND _s3.c_nationkey = nation.n_nationkey -WHERE - NOT region.r_name LIKE 'A%' -GROUP BY - _s3.c_nationkey, - SUBSTRING(_s3.c_comment, 1, 1) + anything_n_name AS name, + n_rows * IIF(NOT c_nationkey IS NULL, 1, 0) AS n_selected_custs +FROM _t0 ORDER BY 1 diff --git a/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql index ad9c80aeb..ab853bfb9 100644 --- a/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql @@ -75,32 +75,15 @@ WITH _s0 AS ( ON _s2.b_key = accounts.a_branchkey AND accounts.a_custkey = ( 42 - _s3.c_key ) -) -SELECT - _s7.b_key AS branch_key, - COUNT(DISTINCT 42 - _s7.c_key) AS n_local_cust, - COUNT(*) AS n_local_cust_local_acct -FROM _s0 AS _s0 -JOIN _s1 AS _s1 - ON SUBSTRING( - SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1), - CASE - WHEN ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 - ) < 1 - THEN 1 - ELSE ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 - ) - END, - CASE - WHEN ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 - ) < 1 - THEN 0 - ELSE ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 - ) - CASE +), _t0 AS ( + SELECT + _s7.b_key, + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON SUBSTRING( + SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1), + CASE WHEN ( LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 ) < 1 @@ -108,27 +91,27 @@ JOIN _s1 AS _s1 ELSE ( LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 ) + END, + CASE + WHEN ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 + ) < 1 + THEN 0 + ELSE ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 + ) - CASE + WHEN ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 + ) < 1 + THEN 1 + ELSE ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 + ) + END END - END - ) = SUBSTRING( - _s0.b_addr, - CASE - WHEN ( - LENGTH(_s0.b_addr) + -7 - ) < 1 - THEN 1 - ELSE ( - LENGTH(_s0.b_addr) + -7 - ) - END, - CASE - WHEN ( - LENGTH(_s0.b_addr) + -5 - ) < 1 - THEN 0 - ELSE ( - LENGTH(_s0.b_addr) + -5 - ) - CASE + ) = SUBSTRING( + _s0.b_addr, + CASE WHEN ( LENGTH(_s0.b_addr) + -7 ) < 1 @@ -136,14 +119,45 @@ JOIN _s1 AS _s1 ELSE ( LENGTH(_s0.b_addr) + -7 ) + END, + CASE + WHEN ( + LENGTH(_s0.b_addr) + -5 + ) < 1 + THEN 0 + ELSE ( + LENGTH(_s0.b_addr) + -5 + ) - CASE + WHEN ( + LENGTH(_s0.b_addr) + -7 + ) < 1 + THEN 1 + ELSE ( + LENGTH(_s0.b_addr) + -7 + ) + END END - END - ) -LEFT JOIN _s7 AS _s7 - ON ( - 42 - _s1.c_key - ) = ( - 42 - _s7.c_key - ) AND _s0.b_key = _s7.b_key + ) + LEFT JOIN _s7 AS _s7 + ON ( + 42 - _s1.c_key + ) = ( + 42 - _s7.c_key + ) AND _s0.b_key = _s7.b_key + GROUP BY + 1, + ( + 42 - ( + _s7.c_key + ) + ) +) +SELECT + b_key AS branch_key, + COUNT(*) AS n_local_cust, + SUM(n_rows * IIF(NOT ( + b_key IS NULL + ), 1, 0)) AS n_local_cust_local_acct +FROM _t0 GROUP BY 1 diff --git a/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql index ad9c80aeb..ab853bfb9 100644 --- a/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql @@ -75,32 +75,15 @@ WITH _s0 AS ( ON _s2.b_key = accounts.a_branchkey AND accounts.a_custkey = ( 42 - _s3.c_key ) -) -SELECT - _s7.b_key AS branch_key, - COUNT(DISTINCT 42 - _s7.c_key) AS n_local_cust, - COUNT(*) AS n_local_cust_local_acct -FROM _s0 AS _s0 -JOIN _s1 AS _s1 - ON SUBSTRING( - SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1), - CASE - WHEN ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 - ) < 1 - THEN 1 - ELSE ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 - ) - END, - CASE - WHEN ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 - ) < 1 - THEN 0 - ELSE ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 - ) - CASE +), _t0 AS ( + SELECT + _s7.b_key, + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON SUBSTRING( + SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1), + CASE WHEN ( LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 ) < 1 @@ -108,27 +91,27 @@ JOIN _s1 AS _s1 ELSE ( LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 ) + END, + CASE + WHEN ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 + ) < 1 + THEN 0 + ELSE ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 + ) - CASE + WHEN ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 + ) < 1 + THEN 1 + ELSE ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 + ) + END END - END - ) = SUBSTRING( - _s0.b_addr, - CASE - WHEN ( - LENGTH(_s0.b_addr) + -7 - ) < 1 - THEN 1 - ELSE ( - LENGTH(_s0.b_addr) + -7 - ) - END, - CASE - WHEN ( - LENGTH(_s0.b_addr) + -5 - ) < 1 - THEN 0 - ELSE ( - LENGTH(_s0.b_addr) + -5 - ) - CASE + ) = SUBSTRING( + _s0.b_addr, + CASE WHEN ( LENGTH(_s0.b_addr) + -7 ) < 1 @@ -136,14 +119,45 @@ JOIN _s1 AS _s1 ELSE ( LENGTH(_s0.b_addr) + -7 ) + END, + CASE + WHEN ( + LENGTH(_s0.b_addr) + -5 + ) < 1 + THEN 0 + ELSE ( + LENGTH(_s0.b_addr) + -5 + ) - CASE + WHEN ( + LENGTH(_s0.b_addr) + -7 + ) < 1 + THEN 1 + ELSE ( + LENGTH(_s0.b_addr) + -7 + ) + END END - END - ) -LEFT JOIN _s7 AS _s7 - ON ( - 42 - _s1.c_key - ) = ( - 42 - _s7.c_key - ) AND _s0.b_key = _s7.b_key + ) + LEFT JOIN _s7 AS _s7 + ON ( + 42 - _s1.c_key + ) = ( + 42 - _s7.c_key + ) AND _s0.b_key = _s7.b_key + GROUP BY + 1, + ( + 42 - ( + _s7.c_key + ) + ) +) +SELECT + b_key AS branch_key, + COUNT(*) AS n_local_cust, + SUM(n_rows * IIF(NOT ( + b_key IS NULL + ), 1, 0)) AS n_local_cust_local_acct +FROM _t0 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql index 6496569fe..7b1d941c0 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql @@ -3,20 +3,26 @@ WITH _s1 AS ( sbtxcustid, sbtxdatetime FROM main.sbtransaction +), _t0 AS ( + SELECT + _s1.sbtxcustid, + ANY_VALUE(sbcustomer.sbcustname) AS anything_sbcustname, + COUNT(*) AS n_rows + FROM main.sbcustomer AS sbcustomer + LEFT JOIN _s1 AS _s1 + ON EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS DATETIME)) = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) + AND EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS DATETIME)) = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) + AND _s1.sbtxcustid = sbcustomer.sbcustid + GROUP BY + EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS DATETIME)), + EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS DATETIME)), + 1 ) SELECT - _s1.sbtxcustid AS _id, - ANY_VALUE(sbcustomer.sbcustname) AS name, - COUNT(*) AS num_transactions -FROM main.sbcustomer AS sbcustomer -LEFT JOIN _s1 AS _s1 - ON EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS DATETIME)) = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) - AND EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS DATETIME)) = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) - AND _s1.sbtxcustid = sbcustomer.sbcustid -GROUP BY - EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS DATETIME)), - EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS DATETIME)), - 1 + sbtxcustid AS _id, + anything_sbcustname AS name, + n_rows * CASE WHEN NOT sbtxcustid IS NULL THEN 1 ELSE 0 END AS num_transactions +FROM _t0 ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_mysql.sql b/tests/test_sql_refsols/defog_broker_adv10_mysql.sql index 0913b4047..2d9dd038a 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_mysql.sql @@ -3,20 +3,26 @@ WITH _s1 AS ( sbtxcustid AS sbTxCustId, sbtxdatetime AS sbTxDateTime FROM main.sbTransaction +), _t0 AS ( + SELECT + _s1.sbTxCustId, + ANY_VALUE(sbCustomer.sbcustname) AS anything_sbCustName, + COUNT(*) AS n_rows + FROM main.sbCustomer AS sbCustomer + LEFT JOIN _s1 AS _s1 + ON EXTRACT(MONTH FROM CAST(_s1.sbTxDateTime AS DATETIME)) = EXTRACT(MONTH FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) + AND EXTRACT(YEAR FROM CAST(_s1.sbTxDateTime AS DATETIME)) = EXTRACT(YEAR FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) + AND _s1.sbTxCustId = sbCustomer.sbcustid + GROUP BY + EXTRACT(MONTH FROM CAST(_s1.sbTxDateTime AS DATETIME)), + EXTRACT(YEAR FROM CAST(_s1.sbTxDateTime AS DATETIME)), + 1 ) SELECT - _s1.sbTxCustId AS _id, - ANY_VALUE(sbCustomer.sbcustname) AS name, - COUNT(*) AS num_transactions -FROM main.sbCustomer AS sbCustomer -LEFT JOIN _s1 AS _s1 - ON EXTRACT(MONTH FROM CAST(_s1.sbTxDateTime AS DATETIME)) = EXTRACT(MONTH FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) - AND EXTRACT(YEAR FROM CAST(_s1.sbTxDateTime AS DATETIME)) = EXTRACT(YEAR FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) - AND _s1.sbTxCustId = sbCustomer.sbcustid -GROUP BY - EXTRACT(MONTH FROM CAST(_s1.sbTxDateTime AS DATETIME)), - EXTRACT(YEAR FROM CAST(_s1.sbTxDateTime AS DATETIME)), - 1 + sbTxCustId AS _id, + anything_sbCustName AS name, + n_rows * CASE WHEN NOT sbTxCustId IS NULL THEN 1 ELSE 0 END AS num_transactions +FROM _t0 ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_postgres.sql b/tests/test_sql_refsols/defog_broker_adv10_postgres.sql index 736fa480d..546ff5aee 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_postgres.sql @@ -3,20 +3,26 @@ WITH _s1 AS ( sbtxcustid, sbtxdatetime FROM main.sbtransaction +), _t0 AS ( + SELECT + _s1.sbtxcustid, + MAX(sbcustomer.sbcustname) AS anything_sbcustname, + COUNT(*) AS n_rows + FROM main.sbcustomer AS sbcustomer + LEFT JOIN _s1 AS _s1 + ON EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)) = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)) = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND _s1.sbtxcustid = sbcustomer.sbcustid + GROUP BY + EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)), + EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)), + 1 ) SELECT - _s1.sbtxcustid AS _id, - MAX(sbcustomer.sbcustname) AS name, - COUNT(*) AS num_transactions -FROM main.sbcustomer AS sbcustomer -LEFT JOIN _s1 AS _s1 - ON EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)) = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) - AND EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)) = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) - AND _s1.sbtxcustid = sbcustomer.sbcustid -GROUP BY - EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)), - EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)), - 1 + sbtxcustid AS _id, + anything_sbcustname AS name, + n_rows * CASE WHEN NOT sbtxcustid IS NULL THEN 1 ELSE 0 END AS num_transactions +FROM _t0 ORDER BY 3 DESC NULLS LAST LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql index e23896f7f..b03ae01b8 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql @@ -3,20 +3,26 @@ WITH _s1 AS ( sbtxcustid, sbtxdatetime FROM main.sbtransaction +), _t0 AS ( + SELECT + _s1.sbtxcustid, + ANY_VALUE(sbcustomer.sbcustname) AS anything_sbcustname, + COUNT(*) AS n_rows + FROM main.sbcustomer AS sbcustomer + LEFT JOIN _s1 AS _s1 + ON MONTH(CAST(_s1.sbtxdatetime AS TIMESTAMP)) = MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND YEAR(CAST(_s1.sbtxdatetime AS TIMESTAMP)) = YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND _s1.sbtxcustid = sbcustomer.sbcustid + GROUP BY + 1, + MONTH(CAST(_s1.sbtxdatetime AS TIMESTAMP)), + YEAR(CAST(_s1.sbtxdatetime AS TIMESTAMP)) ) SELECT - _s1.sbtxcustid AS _id, - ANY_VALUE(sbcustomer.sbcustname) AS name, - COUNT(*) AS num_transactions -FROM main.sbcustomer AS sbcustomer -LEFT JOIN _s1 AS _s1 - ON MONTH(CAST(_s1.sbtxdatetime AS TIMESTAMP)) = MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) - AND YEAR(CAST(_s1.sbtxdatetime AS TIMESTAMP)) = YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) - AND _s1.sbtxcustid = sbcustomer.sbcustid -GROUP BY - 1, - MONTH(CAST(_s1.sbtxdatetime AS TIMESTAMP)), - YEAR(CAST(_s1.sbtxdatetime AS TIMESTAMP)) + sbtxcustid AS _id, + anything_sbcustname AS name, + n_rows * IFF(NOT sbtxcustid IS NULL, 1, 0) AS num_transactions +FROM _t0 ORDER BY 3 DESC NULLS LAST LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql index 0689c9fb0..54a7bab50 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql @@ -3,20 +3,26 @@ WITH _s1 AS ( sbtxcustid, sbtxdatetime FROM main.sbtransaction +), _t0 AS ( + SELECT + _s1.sbtxcustid, + MAX(sbcustomer.sbcustname) AS anything_sbcustname, + COUNT(*) AS n_rows + FROM main.sbcustomer AS sbcustomer + LEFT JOIN _s1 AS _s1 + ON CAST(STRFTIME('%Y', _s1.sbtxdatetime) AS INTEGER) = CAST(STRFTIME('%Y', sbcustomer.sbcustjoindate) AS INTEGER) + AND CAST(STRFTIME('%m', _s1.sbtxdatetime) AS INTEGER) = CAST(STRFTIME('%m', sbcustomer.sbcustjoindate) AS INTEGER) + AND _s1.sbtxcustid = sbcustomer.sbcustid + GROUP BY + CAST(STRFTIME('%Y', _s1.sbtxdatetime) AS INTEGER), + CAST(STRFTIME('%m', _s1.sbtxdatetime) AS INTEGER), + 1 ) SELECT - _s1.sbtxcustid AS _id, - MAX(sbcustomer.sbcustname) AS name, - COUNT(*) AS num_transactions -FROM main.sbcustomer AS sbcustomer -LEFT JOIN _s1 AS _s1 - ON CAST(STRFTIME('%Y', _s1.sbtxdatetime) AS INTEGER) = CAST(STRFTIME('%Y', sbcustomer.sbcustjoindate) AS INTEGER) - AND CAST(STRFTIME('%m', _s1.sbtxdatetime) AS INTEGER) = CAST(STRFTIME('%m', sbcustomer.sbcustjoindate) AS INTEGER) - AND _s1.sbtxcustid = sbcustomer.sbcustid -GROUP BY - CAST(STRFTIME('%Y', _s1.sbtxdatetime) AS INTEGER), - CAST(STRFTIME('%m', _s1.sbtxdatetime) AS INTEGER), - 1 + sbtxcustid AS _id, + anything_sbcustname AS name, + n_rows * IIF(NOT sbtxcustid IS NULL, 1, 0) AS num_transactions +FROM _t0 ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index e09347225..ca99bf581 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -2,15 +2,22 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + _s1.car_id, + ANY_VALUE(cars.make) AS anything_make, + ANY_VALUE(cars.model) AS anything_model, + COUNT(*) AS n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + 1 ) SELECT - ANY_VALUE(cars.make) AS make, - ANY_VALUE(cars.model) AS model, - COUNT(*) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.vin_number) LIKE '%m5%' -GROUP BY - _s1.car_id + anything_make AS make, + anything_model AS model, + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index e09347225..ca99bf581 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -2,15 +2,22 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + _s1.car_id, + ANY_VALUE(cars.make) AS anything_make, + ANY_VALUE(cars.model) AS anything_model, + COUNT(*) AS n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + 1 ) SELECT - ANY_VALUE(cars.make) AS make, - ANY_VALUE(cars.model) AS model, - COUNT(*) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.vin_number) LIKE '%m5%' -GROUP BY - _s1.car_id + anything_make AS make, + anything_model AS model, + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index 60354e100..d43b5dd36 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -2,15 +2,22 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + _s1.car_id, + MAX(cars.make) AS anything_make, + MAX(cars.model) AS anything_model, + COUNT(*) AS n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + 1 ) SELECT - MAX(cars.make) AS make, - MAX(cars.model) AS model, - COUNT(*) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.vin_number) LIKE '%m5%' -GROUP BY - _s1.car_id + anything_make AS make, + anything_model AS model, + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index 3d7738824..1246f42ad 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -2,15 +2,22 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + _s1.car_id, + ANY_VALUE(cars.make) AS anything_make, + ANY_VALUE(cars.model) AS anything_model, + COUNT(*) AS n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + CONTAINS(LOWER(cars.vin_number), 'm5') + GROUP BY + 1 ) SELECT - ANY_VALUE(cars.make) AS make, - ANY_VALUE(cars.model) AS model, - COUNT(*) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - CONTAINS(LOWER(cars.vin_number), 'm5') -GROUP BY - _s1.car_id + anything_make AS make, + anything_model AS model, + n_rows * IFF(NOT car_id IS NULL, 1, 0) AS num_sales +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index 60354e100..59fdcf024 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -2,15 +2,22 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + _s1.car_id, + MAX(cars.make) AS anything_make, + MAX(cars.model) AS anything_model, + COUNT(*) AS n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + 1 ) SELECT - MAX(cars.make) AS make, - MAX(cars.model) AS model, - COUNT(*) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.vin_number) LIKE '%m5%' -GROUP BY - _s1.car_id + anything_make AS make, + anything_model AS model, + n_rows * IIF(NOT car_id IS NULL, 1, 0) AS num_sales +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql index 030fc8286..b4fdda127 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql @@ -5,14 +5,26 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) +), _t0 AS ( + SELECT + _s1.car_id, + COUNT(*) AS n_rows, + SUM(_s1.sale_price) AS sum_sale_price + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.make) LIKE '%toyota%' + GROUP BY + 1 ) SELECT - COUNT(*) AS num_sales, - COALESCE(SUM(_s1.sale_price), 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.make) LIKE '%toyota%' -GROUP BY - _s1.car_id + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales, + CASE + WHEN ( + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END + ) > 0 + THEN COALESCE(sum_sale_price, 0) + ELSE NULL + END AS total_revenue +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql index cb476ee00..d1690b727 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql @@ -5,14 +5,26 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) +), _t0 AS ( + SELECT + _s1.car_id, + COUNT(*) AS n_rows, + SUM(_s1.sale_price) AS sum_sale_price + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.make) LIKE '%toyota%' + GROUP BY + 1 ) SELECT - COUNT(*) AS num_sales, - COALESCE(SUM(_s1.sale_price), 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.make) LIKE '%toyota%' -GROUP BY - _s1.car_id + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales, + CASE + WHEN ( + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END + ) > 0 + THEN COALESCE(sum_sale_price, 0) + ELSE NULL + END AS total_revenue +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql index 0fd50d265..1c1767fa3 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql @@ -5,14 +5,26 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' +), _t0 AS ( + SELECT + _s1.car_id, + COUNT(*) AS n_rows, + SUM(_s1.sale_price) AS sum_sale_price + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.make) LIKE '%toyota%' + GROUP BY + 1 ) SELECT - COUNT(*) AS num_sales, - COALESCE(SUM(_s1.sale_price), 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.make) LIKE '%toyota%' -GROUP BY - _s1.car_id + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales, + CASE + WHEN ( + n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END + ) > 0 + THEN COALESCE(sum_sale_price, 0) + ELSE NULL + END AS total_revenue +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql index 2b5ff888f..792fa3181 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -5,14 +5,26 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) +), _t0 AS ( + SELECT + _s1.car_id, + COUNT(*) AS n_rows, + SUM(_s1.sale_price) AS sum_sale_price + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + CONTAINS(LOWER(cars.make), 'toyota') + GROUP BY + 1 ) SELECT - COUNT(*) AS num_sales, - COALESCE(SUM(_s1.sale_price), 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - CONTAINS(LOWER(cars.make), 'toyota') -GROUP BY - _s1.car_id + n_rows * IFF(NOT car_id IS NULL, 1, 0) AS num_sales, + CASE + WHEN ( + n_rows * IFF(NOT car_id IS NULL, 1, 0) + ) > 0 + THEN COALESCE(sum_sale_price, 0) + ELSE NULL + END AS total_revenue +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql index d54948699..47d615e83 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql @@ -5,14 +5,26 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= DATETIME('now', '-30 day') +), _t0 AS ( + SELECT + _s1.car_id, + COUNT(*) AS n_rows, + SUM(_s1.sale_price) AS sum_sale_price + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.make) LIKE '%toyota%' + GROUP BY + 1 ) SELECT - COUNT(*) AS num_sales, - COALESCE(SUM(_s1.sale_price), 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.make) LIKE '%toyota%' -GROUP BY - _s1.car_id + n_rows * IIF(NOT car_id IS NULL, 1, 0) AS num_sales, + CASE + WHEN ( + n_rows * IIF(NOT car_id IS NULL, 1, 0) + ) > 0 + THEN COALESCE(sum_sale_price, 0) + ELSE NULL + END AS total_revenue +FROM _t0 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 33f6b683a..98f352c3f 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -6,16 +6,23 @@ WITH _s1 AS ( WHERE created_at >= DATE_TRUNC('DAY', DATE_SUB(CURRENT_TIMESTAMP(), 150, DAY)) AND receiver_type = 1 +), _t0 AS ( + SELECT + _s1.receiver_id, + ANY_VALUE(merchants.name) AS anything_name, + COUNT(*) AS n_rows, + SUM(_s1.amount) AS sum_amount + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + GROUP BY + 1 ) SELECT - ANY_VALUE(merchants.name) AS merchant_name, - COUNT(*) AS total_transactions, - COALESCE(SUM(_s1.amount), 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid -GROUP BY - _s1.receiver_id + anything_name AS merchant_name, + n_rows * CASE WHEN NOT receiver_id IS NULL THEN 1 ELSE 0 END AS total_transactions, + COALESCE(sum_amount, 0) AS total_amount +FROM _t0 ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql index e2b281855..9f2637179 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql @@ -6,16 +6,23 @@ WITH _s1 AS ( WHERE created_at >= CAST(DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '150' DAY) AS DATE) AND receiver_type = 1 +), _t0 AS ( + SELECT + _s1.receiver_id, + ANY_VALUE(merchants.name) AS anything_name, + COUNT(*) AS n_rows, + SUM(_s1.amount) AS sum_amount + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + GROUP BY + 1 ) SELECT - ANY_VALUE(merchants.name) AS merchant_name, - COUNT(*) AS total_transactions, - COALESCE(SUM(_s1.amount), 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid -GROUP BY - _s1.receiver_id + anything_name AS merchant_name, + n_rows * CASE WHEN NOT receiver_id IS NULL THEN 1 ELSE 0 END AS total_transactions, + COALESCE(sum_amount, 0) AS total_amount +FROM _t0 ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql index 4cd68e90b..1aa23732c 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql @@ -6,16 +6,23 @@ WITH _s1 AS ( WHERE created_at >= DATE_TRUNC('DAY', CURRENT_TIMESTAMP - INTERVAL '150 DAY') AND receiver_type = 1 +), _t0 AS ( + SELECT + _s1.receiver_id, + MAX(merchants.name) AS anything_name, + COUNT(*) AS n_rows, + SUM(_s1.amount) AS sum_amount + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + GROUP BY + 1 ) SELECT - MAX(merchants.name) AS merchant_name, - COUNT(*) AS total_transactions, - COALESCE(SUM(_s1.amount), 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid -GROUP BY - _s1.receiver_id + anything_name AS merchant_name, + n_rows * CASE WHEN NOT receiver_id IS NULL THEN 1 ELSE 0 END AS total_transactions, + COALESCE(sum_amount, 0) AS total_amount +FROM _t0 ORDER BY 3 DESC NULLS LAST LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql index 4a1f050cb..15a78ae5e 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql @@ -6,16 +6,23 @@ WITH _s1 AS ( WHERE created_at >= DATE_TRUNC('DAY', DATEADD(DAY, -150, CURRENT_TIMESTAMP())) AND receiver_type = 1 +), _t0 AS ( + SELECT + _s1.receiver_id, + ANY_VALUE(merchants.name) AS anything_name, + COUNT(*) AS n_rows, + SUM(_s1.amount) AS sum_amount + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + GROUP BY + 1 ) SELECT - ANY_VALUE(merchants.name) AS merchant_name, - COUNT(*) AS total_transactions, - COALESCE(SUM(_s1.amount), 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid -GROUP BY - _s1.receiver_id + anything_name AS merchant_name, + n_rows * IFF(NOT receiver_id IS NULL, 1, 0) AS total_transactions, + COALESCE(sum_amount, 0) AS total_amount +FROM _t0 ORDER BY 3 DESC NULLS LAST LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 2b2ccd335..22e5635b6 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -6,16 +6,23 @@ WITH _s1 AS ( WHERE created_at >= DATE(DATETIME('now', '-150 day'), 'start of day') AND receiver_type = 1 +), _t0 AS ( + SELECT + _s1.receiver_id, + MAX(merchants.name) AS anything_name, + COUNT(*) AS n_rows, + SUM(_s1.amount) AS sum_amount + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + GROUP BY + 1 ) SELECT - MAX(merchants.name) AS merchant_name, - COUNT(*) AS total_transactions, - COALESCE(SUM(_s1.amount), 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid -GROUP BY - _s1.receiver_id + anything_name AS merchant_name, + n_rows * IIF(NOT receiver_id IS NULL, 1, 0) AS total_transactions, + COALESCE(sum_amount, 0) AS total_amount +FROM _t0 ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql index fb43a9449..a15437263 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql @@ -29,9 +29,10 @@ WITH _s0 AS ( OR _s7.s_month3 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS DATETIME)) ) ), _t1 AS ( - SELECT DISTINCT + SELECT _s9.s_name, - _s9.search_id + _s9.search_id, + COUNT(*) AS n_rows FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS DATETIME)) @@ -39,11 +40,16 @@ WITH _s0 AS ( OR _s0.s_month3 = EXTRACT(MONTH FROM CAST(searches.search_ts AS DATETIME)) LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id + GROUP BY + 1, + 2 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(TRUE) AS sum_is_intra_season + SUM(( + n_rows * CASE WHEN NOT search_id IS NULL THEN 1 ELSE 0 END + ) > 0) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql index 4956e8c5f..21b79af08 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql @@ -29,9 +29,10 @@ WITH _s0 AS ( OR _s7.s_month3 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS DATETIME)) ) ), _t1 AS ( - SELECT DISTINCT + SELECT _s9.s_name, - _s9.search_id + _s9.search_id, + COUNT(*) AS n_rows FROM _s0 AS _s0 JOIN SEARCHES AS SEARCHES ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(SEARCHES.search_ts AS DATETIME)) @@ -39,11 +40,16 @@ WITH _s0 AS ( OR _s0.s_month3 = EXTRACT(MONTH FROM CAST(SEARCHES.search_ts AS DATETIME)) LEFT JOIN _s9 AS _s9 ON SEARCHES.search_id = _s9.search_id AND _s0.s_name = _s9.s_name + GROUP BY + 1, + 2 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(TRUE) AS sum_is_intra_season + SUM(( + n_rows * CASE WHEN NOT search_id IS NULL THEN 1 ELSE 0 END + ) > 0) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql index 1e46c2f36..2dd1ee819 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql @@ -29,9 +29,10 @@ WITH _s0 AS ( OR _s7.s_month3 = EXTRACT(MONTH FROM CAST(_s5.ev_dt AS TIMESTAMP)) ) ), _t1 AS ( - SELECT DISTINCT + SELECT _s9.s_name, - _s9.search_id + _s9.search_id, + COUNT(*) AS n_rows FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS TIMESTAMP)) @@ -39,11 +40,22 @@ WITH _s0 AS ( OR _s0.s_month3 = EXTRACT(MONTH FROM CAST(searches.search_ts AS TIMESTAMP)) LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id + GROUP BY + 1, + 2 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(1) AS sum_is_intra_season + SUM( + CASE + WHEN ( + n_rows * CASE WHEN NOT search_id IS NULL THEN 1 ELSE 0 END + ) > 0 + THEN 1 + ELSE 0 + END + ) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql index b1ca8ede5..56452f25d 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql @@ -29,9 +29,10 @@ WITH _s0 AS ( OR _s7.s_month3 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) ) ), _t1 AS ( - SELECT DISTINCT + SELECT _s9.s_name, - _s9.search_id + _s9.search_id, + COUNT(*) AS n_rows FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) @@ -39,11 +40,16 @@ WITH _s0 AS ( OR _s0.s_month3 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id + GROUP BY + 1, + 2 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - COUNT_IF(TRUE) AS sum_is_intra_season + COUNT_IF(( + n_rows * IFF(NOT search_id IS NULL, 1, 0) + ) > 0) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql index 5f8b57232..ac6901882 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql @@ -31,9 +31,10 @@ WITH _s0 AS ( OR _s7.s_month3 = CAST(STRFTIME('%m', _s5.ev_dt) AS INTEGER) ) ), _t1 AS ( - SELECT DISTINCT + SELECT _s9.s_name, - _s9.search_id + _s9.search_id, + COUNT(*) AS n_rows FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = CAST(STRFTIME('%m', searches.search_ts) AS INTEGER) @@ -41,11 +42,16 @@ WITH _s0 AS ( OR _s0.s_month3 = CAST(STRFTIME('%m', searches.search_ts) AS INTEGER) LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id + GROUP BY + 1, + 2 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(TRUE) AS sum_is_intra_season + SUM(( + n_rows * IIF(NOT search_id IS NULL, 1, 0) + ) > 0) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index 4249f19e3..8783042be 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -13,6 +13,7 @@ WITH _s0 AS ( FROM main.incidents ), _t1 AS ( SELECT + _s7.in_device_id, ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, COUNT(*) AS n_rows @@ -24,13 +25,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id GROUP BY - _s7.in_device_id + 1 ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql index d06dcc150..43b4ef778 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql @@ -13,6 +13,7 @@ WITH _s0 AS ( FROM main.INCIDENTS ), _t1 AS ( SELECT + _s7.in_device_id, ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, COUNT(*) AS n_rows @@ -24,13 +25,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON DEVICES.de_id = _s7.in_device_id GROUP BY - _s7.in_device_id + 1 ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql index 903dbeb65..1b932331b 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql @@ -13,6 +13,7 @@ WITH _s0 AS ( FROM main.incidents ), _t1 AS ( SELECT + _s7.in_device_id, MAX(_s3.co_id) AS anything__id_3, MAX(_s2.co_id) AS anything_co_id, COUNT(*) AS n_rows @@ -24,13 +25,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id GROUP BY - _s7.in_device_id + 1 ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql index 26924e69f..15dcb5047 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -13,6 +13,7 @@ WITH _s0 AS ( FROM main.incidents ), _t1 AS ( SELECT + _s7.in_device_id, ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, COUNT(*) AS n_rows @@ -24,13 +25,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id GROUP BY - _s7.in_device_id + 1 ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * IFF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index ea40864d6..dbcaf6a68 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -13,6 +13,7 @@ WITH _s0 AS ( FROM main.incidents ), _t1 AS ( SELECT + _s7.in_device_id, MAX(_s3.co_id) AS anything__id_3, MAX(_s2.co_id) AS anything_co_id, COUNT(*) AS n_rows @@ -24,13 +25,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id GROUP BY - _s7.in_device_id + 1 ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * IIF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index 331ebd37d..d54610223 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -38,6 +38,7 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT + _s11.in_device_id, ANY_VALUE(users.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows FROM main.users AS users @@ -46,12 +47,12 @@ WITH _t2 AS ( LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id GROUP BY - _s11.in_device_id + 1 ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql index 65d1dcdda..e192773d4 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql @@ -38,6 +38,7 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT + _s11.in_device_id, ANY_VALUE(USERS.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows FROM main.USERS AS USERS @@ -46,12 +47,12 @@ WITH _t2 AS ( LEFT JOIN _t2 AS _s11 ON DEVICES.de_id = _s11.in_device_id GROUP BY - _s11.in_device_id + 1 ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql index 01b74e04e..f415d53cf 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql @@ -38,6 +38,7 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT + _s11.in_device_id, MAX(users.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows FROM main.users AS users @@ -46,12 +47,12 @@ WITH _t2 AS ( LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id GROUP BY - _s11.in_device_id + 1 ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql index c51990a88..4703c4aaf 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql @@ -38,6 +38,7 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT + _s11.in_device_id, ANY_VALUE(users.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows FROM main.users AS users @@ -46,12 +47,12 @@ WITH _t2 AS ( LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id GROUP BY - _s11.in_device_id + 1 ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * IFF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index 117f8a414..747b8a865 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -38,6 +38,7 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT + _s11.in_device_id, MAX(users.us_country_id) AS anything_us_country_id, COUNT(*) AS n_rows FROM main.users AS users @@ -46,12 +47,12 @@ WITH _t2 AS ( LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id GROUP BY - _s11.in_device_id + 1 ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * IIF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index 7d3d9d3fb..ec68d668a 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -4,6 +4,7 @@ WITH _s3 AS ( FROM main.incidents ), _t1 AS ( SELECT + _s3.in_device_id, ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.devices AS devices @@ -12,19 +13,19 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - _s3.in_device_id + 1 ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_incidents FROM _t1 GROUP BY 1 ) SELECT countries.co_name AS country, - ROUND(COALESCE(_s5.sum_n_rows, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql index 2eb2686d3..2f71be8e8 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql @@ -4,6 +4,7 @@ WITH _s3 AS ( FROM main.INCIDENTS ), _t1 AS ( SELECT + _s3.in_device_id, ANY_VALUE(DEVICES.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.DEVICES AS DEVICES @@ -12,19 +13,19 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON DEVICES.de_id = _s3.in_device_id GROUP BY - _s3.in_device_id + 1 ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_incidents FROM _t1 GROUP BY 1 ) SELECT COUNTRIES.co_name COLLATE utf8mb4_bin AS country, - ROUND(COALESCE(_s5.sum_n_rows, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.COUNTRIES AS COUNTRIES LEFT JOIN _s5 AS _s5 ON COUNTRIES.co_id = _s5.anything_de_production_country_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql index 72f3de996..9750d9a8b 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql @@ -4,6 +4,7 @@ WITH _s3 AS ( FROM main.incidents ), _t1 AS ( SELECT + _s3.in_device_id, MAX(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.devices AS devices @@ -12,12 +13,12 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - _s3.in_device_id + 1 ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_incidents FROM _t1 GROUP BY 1 @@ -25,7 +26,7 @@ WITH _s3 AS ( SELECT countries.co_name AS country, ROUND( - CAST(CAST(COALESCE(_s5.sum_n_rows, 0) AS DOUBLE PRECISION) / COALESCE(_s5.n_rows, 0) AS DECIMAL), + CAST(CAST(COALESCE(_s5.sum_n_incidents, 0) AS DOUBLE PRECISION) / COALESCE(_s5.n_rows, 0) AS DECIMAL), 2 ) AS ir FROM main.countries AS countries diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql index dd7dddaeb..360f65fa6 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -4,6 +4,7 @@ WITH _s3 AS ( FROM main.incidents ), _t1 AS ( SELECT + _s3.in_device_id, ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.devices AS devices @@ -12,19 +13,19 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - _s3.in_device_id + 1 ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * IFF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_incidents FROM _t1 GROUP BY 1 ) SELECT countries.co_name AS country, - ROUND(COALESCE(_s5.sum_n_rows, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index a5f88e272..b49ac1207 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -4,6 +4,7 @@ WITH _s3 AS ( FROM main.incidents ), _t1 AS ( SELECT + _s3.in_device_id, MAX(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(*) AS n_rows FROM main.devices AS devices @@ -12,19 +13,19 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - _s3.in_device_id + 1 ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows) AS sum_n_rows + SUM(n_rows * IIF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_incidents FROM _t1 GROUP BY 1 ) SELECT countries.co_name AS country, - ROUND(CAST(COALESCE(_s5.sum_n_rows, 0) AS REAL) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index f139a043c..38b0e3302 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -46,17 +46,23 @@ WITH _t5 AS ( AND _s11.o_orderkey = _t3.o_orderkey WHERE _t3.anything_o_orderstatus = 'F' +), _t0 AS ( + SELECT + _s13.anything_l_suppkey, + ANY_VALUE(supplier.s_name) AS anything_s_name, + COUNT(*) AS n_rows + FROM tpch.supplier AS supplier + JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey + LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey + GROUP BY + 1 ) SELECT - ANY_VALUE(supplier.s_name) AS S_NAME, - COUNT(*) AS NUMWAIT -FROM tpch.supplier AS supplier -JOIN tpch.nation AS nation - ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey -GROUP BY - _s13.anything_l_suppkey + anything_s_name AS S_NAME, + n_rows * CASE WHEN NOT anything_l_suppkey IS NULL THEN 1 ELSE 0 END AS NUMWAIT +FROM _t0 ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index bb319b113..f98980c93 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -50,17 +50,23 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL +), _t0 AS ( + SELECT + _s13.anything_l_suppkey, + ANY_VALUE(SUPPLIER.s_name) AS anything_s_name, + COUNT(*) AS n_rows + FROM tpch.SUPPLIER AS SUPPLIER + JOIN tpch.NATION AS NATION + ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey + LEFT JOIN _s13 AS _s13 + ON SUPPLIER.s_suppkey = _s13.anything_l_suppkey + GROUP BY + 1 ) SELECT - ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, - COUNT(*) AS NUMWAIT -FROM tpch.SUPPLIER AS SUPPLIER -JOIN tpch.NATION AS NATION - ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey -LEFT JOIN _s13 AS _s13 - ON SUPPLIER.s_suppkey = _s13.anything_l_suppkey -GROUP BY - _s13.anything_l_suppkey + anything_s_name COLLATE utf8mb4_bin AS S_NAME, + n_rows * CASE WHEN NOT anything_l_suppkey IS NULL THEN 1 ELSE 0 END AS NUMWAIT +FROM _t0 ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index 54e706b70..272f6509f 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -50,17 +50,23 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL +), _t0 AS ( + SELECT + _s13.anything_l_suppkey, + MAX(supplier.s_name) AS anything_s_name, + COUNT(*) AS n_rows + FROM tpch.supplier AS supplier + JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey + LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey + GROUP BY + 1 ) SELECT - MAX(supplier.s_name) AS S_NAME, - COUNT(*) AS NUMWAIT -FROM tpch.supplier AS supplier -JOIN tpch.nation AS nation - ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey -GROUP BY - _s13.anything_l_suppkey + anything_s_name AS S_NAME, + n_rows * CASE WHEN NOT anything_l_suppkey IS NULL THEN 1 ELSE 0 END AS NUMWAIT +FROM _t0 ORDER BY 2 DESC NULLS LAST, 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index 7b4e2eed9..7a0653c71 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -50,17 +50,23 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL +), _t0 AS ( + SELECT + _s13.anything_l_suppkey, + ANY_VALUE(supplier.s_name) AS anything_s_name, + COUNT(*) AS n_rows + FROM tpch.supplier AS supplier + JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey + LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey + GROUP BY + 1 ) SELECT - ANY_VALUE(supplier.s_name) AS S_NAME, - COUNT(*) AS NUMWAIT -FROM tpch.supplier AS supplier -JOIN tpch.nation AS nation - ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey -GROUP BY - _s13.anything_l_suppkey + anything_s_name AS S_NAME, + n_rows * IFF(NOT anything_l_suppkey IS NULL, 1, 0) AS NUMWAIT +FROM _t0 ORDER BY 2 DESC NULLS LAST, 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index c8aea7555..813e8788d 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -50,17 +50,23 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL +), _t0 AS ( + SELECT + _s13.anything_l_suppkey, + MAX(supplier.s_name) AS anything_s_name, + COUNT(*) AS n_rows + FROM tpch.supplier AS supplier + JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey + LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey + GROUP BY + 1 ) SELECT - MAX(supplier.s_name) AS S_NAME, - COUNT(*) AS NUMWAIT -FROM tpch.supplier AS supplier -JOIN tpch.nation AS nation - ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey -GROUP BY - _s13.anything_l_suppkey + anything_s_name AS S_NAME, + n_rows * IIF(NOT anything_l_suppkey IS NULL, 1, 0) AS NUMWAIT +FROM _t0 ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q22_ansi.sql b/tests/test_sql_refsols/tpch_q22_ansi.sql index 66a4bd6df..a1663060c 100644 --- a/tests/test_sql_refsols/tpch_q22_ansi.sql +++ b/tests/test_sql_refsols/tpch_q22_ansi.sql @@ -11,6 +11,7 @@ WITH _s0 AS ( FROM tpch.orders ), _t2 AS ( SELECT + _s3.o_custkey, ANY_VALUE(customer.c_acctbal) AS anything_c_acctbal, ANY_VALUE(customer.c_phone) AS anything_c_phone, COUNT(*) AS n_rows @@ -21,7 +22,7 @@ WITH _s0 AS ( LEFT JOIN _s3 AS _s3 ON _s3.o_custkey = customer.c_custkey GROUP BY - _s3.o_custkey + 1 ) SELECT SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, @@ -29,7 +30,9 @@ SELECT COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL FROM _t2 WHERE - n_rows = 0 + ( + n_rows * CASE WHEN NOT o_custkey IS NULL THEN 1 ELSE 0 END + ) = 0 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_mysql.sql b/tests/test_sql_refsols/tpch_q22_mysql.sql index bcba0ba2f..12c094ea4 100644 --- a/tests/test_sql_refsols/tpch_q22_mysql.sql +++ b/tests/test_sql_refsols/tpch_q22_mysql.sql @@ -11,6 +11,7 @@ WITH _s0 AS ( FROM tpch.ORDERS ), _t2 AS ( SELECT + _s3.o_custkey, ANY_VALUE(CUSTOMER.c_acctbal) AS anything_c_acctbal, ANY_VALUE(CUSTOMER.c_phone) AS anything_c_phone, COUNT(*) AS n_rows @@ -21,7 +22,7 @@ WITH _s0 AS ( LEFT JOIN _s3 AS _s3 ON CUSTOMER.c_custkey = _s3.o_custkey GROUP BY - _s3.o_custkey + 1 ) SELECT SUBSTRING(anything_c_phone, 1, 2) COLLATE utf8mb4_bin AS CNTRY_CODE, @@ -29,7 +30,9 @@ SELECT COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL FROM _t2 WHERE - n_rows = 0 + ( + n_rows * CASE WHEN NOT o_custkey IS NULL THEN 1 ELSE 0 END + ) = 0 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_postgres.sql b/tests/test_sql_refsols/tpch_q22_postgres.sql index eb1c7ae9c..4e78bfbaf 100644 --- a/tests/test_sql_refsols/tpch_q22_postgres.sql +++ b/tests/test_sql_refsols/tpch_q22_postgres.sql @@ -11,6 +11,7 @@ WITH _s0 AS ( FROM tpch.orders ), _t2 AS ( SELECT + _s3.o_custkey, MAX(customer.c_acctbal) AS anything_c_acctbal, MAX(customer.c_phone) AS anything_c_phone, COUNT(*) AS n_rows @@ -21,7 +22,7 @@ WITH _s0 AS ( LEFT JOIN _s3 AS _s3 ON _s3.o_custkey = customer.c_custkey GROUP BY - _s3.o_custkey + 1 ) SELECT SUBSTRING(anything_c_phone FROM 1 FOR 2) AS CNTRY_CODE, @@ -29,7 +30,9 @@ SELECT COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL FROM _t2 WHERE - n_rows = 0 + ( + n_rows * CASE WHEN NOT o_custkey IS NULL THEN 1 ELSE 0 END + ) = 0 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_snowflake.sql b/tests/test_sql_refsols/tpch_q22_snowflake.sql index ccde6e030..8268a3f57 100644 --- a/tests/test_sql_refsols/tpch_q22_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q22_snowflake.sql @@ -11,6 +11,7 @@ WITH _s0 AS ( FROM tpch.orders ), _t2 AS ( SELECT + _s3.o_custkey, ANY_VALUE(customer.c_acctbal) AS anything_c_acctbal, ANY_VALUE(customer.c_phone) AS anything_c_phone, COUNT(*) AS n_rows @@ -21,7 +22,7 @@ WITH _s0 AS ( LEFT JOIN _s3 AS _s3 ON _s3.o_custkey = customer.c_custkey GROUP BY - _s3.o_custkey + 1 ) SELECT SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, @@ -29,7 +30,9 @@ SELECT COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL FROM _t2 WHERE - n_rows = 0 + ( + n_rows * IFF(NOT o_custkey IS NULL, 1, 0) + ) = 0 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_sqlite.sql b/tests/test_sql_refsols/tpch_q22_sqlite.sql index f422529b1..fde501a05 100644 --- a/tests/test_sql_refsols/tpch_q22_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q22_sqlite.sql @@ -11,6 +11,7 @@ WITH _s0 AS ( FROM tpch.orders ), _t2 AS ( SELECT + _s3.o_custkey, MAX(customer.c_acctbal) AS anything_c_acctbal, MAX(customer.c_phone) AS anything_c_phone, COUNT(*) AS n_rows @@ -21,7 +22,7 @@ WITH _s0 AS ( LEFT JOIN _s3 AS _s3 ON _s3.o_custkey = customer.c_custkey GROUP BY - _s3.o_custkey + 1 ) SELECT SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, @@ -29,7 +30,9 @@ SELECT COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL FROM _t2 WHERE - n_rows = 0 + ( + n_rows * IIF(NOT o_custkey IS NULL, 1, 0) + ) = 0 GROUP BY 1 ORDER BY From ff718317819bb06b14c060dea9e945378a05307b Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 21 Oct 2025 14:30:55 -0400 Subject: [PATCH 113/143] Revision to left case handling [RUN CI] --- demos/notebooks/4_tpch.ipynb | 2 +- .../conversion/join_aggregate_transpose.py | 16 ++- tests/test_plan_refsols/common_prefix_ag.txt | 72 +++++----- tests/test_plan_refsols/common_prefix_ah.txt | 10 +- tests/test_plan_refsols/common_prefix_ai.txt | 61 ++++---- tests/test_plan_refsols/common_prefix_aj.txt | 72 +++++----- tests/test_plan_refsols/common_prefix_ak.txt | 56 ++++---- tests/test_plan_refsols/common_prefix_an.txt | 25 ++-- tests/test_plan_refsols/common_prefix_ao.txt | 20 +-- tests/test_plan_refsols/common_prefix_u.txt | 8 +- tests/test_plan_refsols/common_prefix_x.txt | 8 +- tests/test_plan_refsols/common_prefix_y.txt | 19 ++- tests/test_plan_refsols/correl_1.txt | 8 +- tests/test_plan_refsols/correl_2.txt | 14 +- .../cryptbank_general_join_01_raw.txt | 14 +- .../cryptbank_general_join_01_rewrite.txt | 14 +- .../customer_largest_order_deltas.txt | 4 +- .../epoch_intra_season_searches.txt | 6 +- .../quantile_function_test_2.txt | 4 +- .../quantile_function_test_3.txt | 4 +- .../quantile_function_test_4.txt | 4 +- tests/test_plan_refsols/singular7.txt | 23 ++-- .../supplier_pct_national_qty.txt | 4 +- ...chnograph_country_combination_analysis.txt | 10 +- ...nograph_country_incident_rate_analysis.txt | 10 +- ..._error_rate_sun_set_by_factory_country.txt | 10 +- tests/test_plan_refsols/tpch_q21.txt | 6 +- tests/test_plan_refsols/tpch_q22.txt | 22 ++- .../window_filter_order_1.txt | 6 +- .../window_filter_order_2.txt | 6 +- .../window_filter_order_3.txt | 6 +- .../window_filter_order_8.txt | 6 +- .../window_filter_order_9.txt | 8 +- .../tpch_test_functions.py | 4 +- tests/test_qualification.py | 2 +- .../conditional_functions_ansi.sql | 2 +- .../conditional_functions_mysql.sql | 2 +- .../conditional_functions_postgres.sql | 2 +- .../conditional_functions_snowflake.sql | 2 +- .../conditional_functions_sqlite.sql | 2 +- tests/test_sql_refsols/correl_1_sqlite.sql | 25 ++-- tests/test_sql_refsols/correl_2_sqlite.sql | 33 ++--- .../cryptbank_general_join_01_raw_sqlite.sql | 130 ++++++++---------- ...yptbank_general_join_01_rewrite_sqlite.sql | 130 ++++++++---------- .../defog_broker_adv10_ansi.sql | 32 ++--- .../defog_broker_adv10_mysql.sql | 32 ++--- .../defog_broker_adv10_postgres.sql | 32 ++--- .../defog_broker_adv10_snowflake.sql | 30 ++-- .../defog_broker_adv10_sqlite.sql | 32 ++--- .../defog_dealership_adv3_ansi.sql | 27 ++-- .../defog_dealership_adv3_mysql.sql | 27 ++-- .../defog_dealership_adv3_postgres.sql | 27 ++-- .../defog_dealership_adv3_snowflake.sql | 27 ++-- .../defog_dealership_adv3_sqlite.sql | 27 ++-- .../defog_dealership_adv4_ansi.sql | 30 ++-- .../defog_dealership_adv4_mysql.sql | 30 ++-- .../defog_dealership_adv4_postgres.sql | 30 ++-- .../defog_dealership_adv4_snowflake.sql | 30 ++-- .../defog_dealership_adv4_sqlite.sql | 30 ++-- .../defog_dealership_adv6_ansi.sql | 2 +- .../defog_dealership_adv6_mysql.sql | 2 +- .../defog_dealership_adv6_postgres.sql | 2 +- .../defog_dealership_adv6_snowflake.sql | 2 +- .../defog_dealership_adv6_sqlite.sql | 2 +- .../defog_dermtreatment_adv5_ansi.sql | 2 +- .../defog_dermtreatment_adv5_mysql.sql | 2 +- .../defog_dermtreatment_adv5_postgres.sql | 2 +- .../defog_dermtreatment_adv5_snowflake.sql | 2 +- .../defog_dermtreatment_adv5_sqlite.sql | 2 +- .../defog_ewallet_adv12_ansi.sql | 2 +- .../defog_ewallet_adv12_mysql.sql | 2 +- .../defog_ewallet_adv12_postgres.sql | 2 +- .../defog_ewallet_adv12_snowflake.sql | 2 +- .../defog_ewallet_adv12_sqlite.sql | 2 +- .../defog_ewallet_basic10_ansi.sql | 23 ++-- .../defog_ewallet_basic10_mysql.sql | 23 ++-- .../defog_ewallet_basic10_postgres.sql | 23 ++-- .../defog_ewallet_basic10_snowflake.sql | 23 ++-- .../defog_ewallet_basic10_sqlite.sql | 23 ++-- .../defog_ewallet_basic8_ansi.sql | 2 +- .../defog_ewallet_basic8_mysql.sql | 2 +- .../defog_ewallet_basic8_postgres.sql | 2 +- .../defog_ewallet_basic8_snowflake.sql | 2 +- .../defog_ewallet_basic8_sqlite.sql | 2 +- .../defog_ewallet_gen4_ansi.sql | 10 +- .../defog_ewallet_gen4_mysql.sql | 10 +- .../defog_ewallet_gen4_postgres.sql | 10 +- .../defog_ewallet_gen4_snowflake.sql | 10 +- .../defog_ewallet_gen4_sqlite.sql | 10 +- .../epoch_intra_season_searches_ansi.sql | 13 +- .../epoch_intra_season_searches_mysql.sql | 13 +- .../epoch_intra_season_searches_postgres.sql | 19 +-- .../epoch_intra_season_searches_snowflake.sql | 13 +- .../epoch_intra_season_searches_sqlite.sql | 13 +- .../test_sql_refsols/quantile_test_2_ansi.sql | 2 +- .../quantile_test_2_mysql.sql | 4 +- .../quantile_test_2_postgres.sql | 2 +- .../quantile_test_2_snowflake.sql | 2 +- .../quantile_test_2_sqlite.sql | 4 +- ...raph_country_combination_analysis_ansi.sql | 9 +- ...aph_country_combination_analysis_mysql.sql | 9 +- ..._country_combination_analysis_postgres.sql | 9 +- ...country_combination_analysis_snowflake.sql | 9 +- ...ph_country_combination_analysis_sqlite.sql | 9 +- ...ph_country_incident_rate_analysis_ansi.sql | 9 +- ...h_country_incident_rate_analysis_mysql.sql | 9 +- ...ountry_incident_rate_analysis_postgres.sql | 9 +- ...untry_incident_rate_analysis_snowflake.sql | 9 +- ..._country_incident_rate_analysis_sqlite.sql | 12 +- ...r_rate_sun_set_by_factory_country_ansi.sql | 9 +- ..._rate_sun_set_by_factory_country_mysql.sql | 9 +- ...te_sun_set_by_factory_country_postgres.sql | 9 +- ...e_sun_set_by_factory_country_snowflake.sql | 9 +- ...rate_sun_set_by_factory_country_sqlite.sql | 9 +- tests/test_sql_refsols/tpch_q21_ansi.sql | 24 ++-- tests/test_sql_refsols/tpch_q21_mysql.sql | 24 ++-- tests/test_sql_refsols/tpch_q21_postgres.sql | 24 ++-- tests/test_sql_refsols/tpch_q21_snowflake.sql | 24 ++-- tests/test_sql_refsols/tpch_q21_sqlite.sql | 24 ++-- tests/test_sql_refsols/tpch_q22_ansi.sql | 33 ++--- tests/test_sql_refsols/tpch_q22_mysql.sql | 31 ++--- tests/test_sql_refsols/tpch_q22_postgres.sql | 31 ++--- tests/test_sql_refsols/tpch_q22_snowflake.sql | 31 ++--- tests/test_sql_refsols/tpch_q22_sqlite.sql | 31 ++--- tests/test_unqualified_node.py | 2 +- 125 files changed, 874 insertions(+), 1168 deletions(-) diff --git a/demos/notebooks/4_tpch.ipynb b/demos/notebooks/4_tpch.ipynb index 78c46b90a..76d4f5ef2 100644 --- a/demos/notebooks/4_tpch.ipynb +++ b/demos/notebooks/4_tpch.ipynb @@ -1678,7 +1678,7 @@ " .CALCULATE(global_avg_balance=AVG(selected_customers.WHERE(account_balance > 0.0).account_balance))\n", " .customers\n", " .CALCULATE(cntry_code=phone[:2])\n", - " .WHERE(is_selected_code & (account_balance > global_avg_balance) & (COUNT(orders) == 0))\n", + " .WHERE(is_selected_code & (account_balance > global_avg_balance) & HASNOT(orders))\n", " .PARTITION(\n", " name=\"countries\",\n", " by=cntry_code,\n", diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index b7ee2d08a..89135e4cc 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -177,12 +177,13 @@ def join_aggregate_transpose( # Now that the transpose is deemed possible, if in the left join # scenario, transform any `COUNT(*)` calls into `COUNT(col)`, where # `col` is one of the aggregation keys. If this is not possible, then - # abort. + # abort. Also abort if any of the aggregation keys are not used as + # equi-join keys. if left_join_case and any( agg.op == pydop.COUNT and len(agg.inputs) == 0 for agg in aggregate.aggregations.values() ): - if len(agg_key_refs) == 0: + if (len(agg_key_refs) == 0) or (len(agg_key_refs) < len(aggregate.keys)): return None key_expr: RelationalExpression = aggregate.keys[agg_key_refs[0].name] new_call: CallExpression = CallExpression( @@ -269,6 +270,17 @@ def join_aggregate_transpose( # For each join key from the non-aggregate side, alter its substitution # to map it to the corresponding key from the aggregate side. for agg_key, non_agg_key in zip(agg_key_refs, non_agg_key_refs): + # If in the left join situation, also switch the aggregation key + # to point to the equivalent value from the non-aggregate side of + # the left join. + if left_join_case: + lhs_join_key_ref = join_sub[non_agg_key] + assert isinstance(lhs_join_key_ref, ColumnReference) + lhs_join_key_agg: CallExpression = new_aggregate_aggs[ + lhs_join_key_ref.name + ] + assert lhs_join_key_agg.op == pydop.ANYTHING + new_aggregate_keys[agg_key.name] = lhs_join_key_agg.inputs[0] join_sub[non_agg_key] = join_sub[agg_key] # TODO ADD COMMENTS diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 049eef756..c059d3ebf 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(c_custkey), 1:numeric, 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', count_c_custkey), ('n_machine_high_domestic_lines', sum_count_o_orderkey), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=count_c_custkey > 0:numeric & sum_count_o_orderkey > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'count_c_custkey': count_c_custkey, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_count_o_orderkey': sum_count_o_orderkey, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'count_c_custkey': COUNT(c_custkey), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_count_o_orderkey': SUM(count_o_orderkey), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,35 +10,33 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows * IFF(PRESENT(o_orderkey), 1:numeric, 0:numeric), 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index 56d52db6b..939276643 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(o_orderkey), 1:numeric, 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_count_o_orderkey), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_count_o_orderkey > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_count_o_orderkey': sum_count_o_orderkey, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_count_o_orderkey': SUM(count_o_orderkey), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index c1c1d47cd..43ac4679b 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,31 +1,30 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(c_custkey), 1:numeric, 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', count_c_custkey), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=count_c_custkey > 0:numeric, columns={'anything_n_name': anything_n_name, 'count_c_custkey': count_c_custkey, 'n_rows': ndistinct_c_custkey_0, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'ndistinct_c_custkey_0': NDISTINCT(c_custkey_0), 'sum_sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index c2ae42100..0299a1186 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(c_custkey), 1:numeric, 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', count_c_custkey), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=count_c_custkey > 0:numeric & sum_count_o_orderkey > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'count_c_custkey': count_c_custkey, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'count_c_custkey': COUNT(c_custkey), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_count_o_orderkey': SUM(count_o_orderkey), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,35 +10,33 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows * IFF(PRESENT(o_orderkey), 1:numeric, 0:numeric), 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 2a839c988..b10b08bfc 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(c_custkey), 1:numeric, 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_rows': t1.n_rows}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', count_c_custkey), ('n_machine_high_domestic_lines', sum_count_o_orderkey)], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=count_c_custkey > 0:numeric & sum_count_o_orderkey > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'count_c_custkey': count_c_custkey, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_count_o_orderkey': sum_count_o_orderkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'count_c_custkey': COUNT(c_custkey), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_count_o_orderkey': SUM(count_o_orderkey)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,33 +10,31 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows * IFF(PRESENT(o_orderkey), 1:numeric, 0:numeric)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t1.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 4873ae241..1c04c71f6 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('cust_key', anything_o_custkey), ('n_orders', n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric)), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(anything_o_custkey):asc_first]) - FILTER(condition=n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric) > RELAVG(args=[n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric)], partition=[anything_c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'anything_c_nationkey': ANYTHING(c_nationkey), 'anything_n_rows_0': ANYTHING(n_rows_0), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'n_rows_0': t0.n_rows}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', count_anything_o_custkey), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(c_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(sum_count_l_orderkey, 0:numeric) > 0:numeric & count_anything_o_custkey > RELAVG(args=[count_anything_o_custkey], partition=[anything_c_nationkey], order=[]) & sum_count_l_orderkey > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'c_custkey': c_custkey, 'count_anything_o_custkey': count_anything_o_custkey}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_nationkey': ANYTHING(c_nationkey), 'anything_n_rows_0': ANYTHING(n_rows_0), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_count_l_orderkey': SUM(count_l_orderkey)}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'count_l_orderkey': t1.count_l_orderkey, 'n_rows_0': t0.n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) @@ -10,12 +10,11 @@ ROOT(columns=[('cust_key', anything_o_custkey), ('n_orders', n_rows * IFF(PRESEN SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric)}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 76d840838..a22099028 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,19 +1,19 @@ -ROOT(columns=[('cust_key', o_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)), ('n_part_purchases', anything_sum_n_rows)], orderings=[(o_custkey):asc_first], limit=5:numeric) - FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) > 0:numeric & DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]), columns={'anything_n_rows': anything_n_rows, 'anything_sum_n_rows': anything_sum_n_rows, 'n_rows': n_rows, 'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'anything_sum_n_rows': ANYTHING(sum_n_rows), 'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey, 'sum_n_rows': t0.sum_n_rows}) - LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', count_o_custkey), ('n_part_purchases', anything_sum_count_l_orderkey)], orderings=[(c_custkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]) & count_o_custkey > 0:numeric, columns={'anything_n_rows': anything_n_rows, 'anything_sum_count_l_orderkey': anything_sum_count_l_orderkey, 'c_custkey': c_custkey, 'count_o_custkey': count_o_custkey}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'anything_sum_count_l_orderkey': ANYTHING(sum_count_l_orderkey), 'count_o_custkey': COUNT(o_custkey)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey, 'sum_count_l_orderkey': t0.sum_count_l_orderkey}) + LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_count_l_orderkey': sum_count_l_orderkey}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_count_l_orderkey': t1.sum_count_l_orderkey}) LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric))}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) + FILTER(condition=sum_count_l_orderkey > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_count_l_orderkey': sum_count_l_orderkey}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_count_l_orderkey': SUM(count_l_orderkey)}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 096c535f5..4222a39ef 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -5,10 +5,10 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:n SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_n_rows': SUM(n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric)), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_quantity': t1.l_quantity, 'o_custkey': t0.o_custkey}) + FILTER(condition=sum_count_l_orderkey > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_count_l_orderkey': SUM(count_l_orderkey), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_quantity': t1.l_quantity, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index cdd9874a0..b6c733f43 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,10 +1,10 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric))}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) + FILTER(condition=sum_count_l_orderkey > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_count_l_orderkey': SUM(count_l_orderkey)}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 988bda8ed..a296ce8e7 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('name', anything_c_name), ('n_orders', n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric))], orderings=[(n_rows * IFF(PRESENT(anything_o_custkey), 1:numeric, 0:numeric)):desc_last, (anything_c_name):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) +ROOT(columns=[('name', anything_c_name), ('n_orders', count_anything_o_custkey)], orderings=[(count_anything_o_custkey):desc_last, (anything_c_name):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_count_l_orderkey, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'count_anything_o_custkey': count_anything_o_custkey}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_count_l_orderkey': SUM(count_l_orderkey)}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'count_l_orderkey': t1.count_l_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows * IFF(PRESENT(l_orderkey), 1:numeric, 0:numeric)}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index b7c906649..352d9d69f 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('region_name', anything_r_name), ('n_prefix_nations', n_rows * IFF(PRESENT(n_regionkey), 1:numeric, 0:numeric))], orderings=[(anything_r_name):asc_first]) - AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'anything_r_name': ANYTHING(r_name), 'n_rows': COUNT()}) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index 46082d9d5..bf42b8cd6 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('name', anything_n_name), ('n_selected_custs', n_rows * IFF(PRESENT(c_nationkey), 1:numeric, 0:numeric))], orderings=[(anything_n_name):asc_first]) - AGGREGATE(keys={'c_nationkey': c_nationkey, 'expr_1': SLICE(c_comment, None:unknown, 1:numeric, None:unknown)}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) - JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == SLICE(t1.c_comment, None:unknown, 1:numeric, None:unknown) & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_comment': t1.c_comment, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) +ROOT(columns=[('name', n_name), ('n_selected_custs', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(n_name):asc_first]) + JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey, 'expr_1': SLICE(c_comment, None:unknown, 1:numeric, None:unknown)}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt index 790b89a6e..641e01b1c 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(b_key), 1:numeric, 0:numeric))}) - AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == UNMASK::((42 - ([t1.c_key]))), type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t1.b_key, 'c_key': t1.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) +ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) + AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == t1.unmask_c_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) + AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt index 790b89a6e..641e01b1c 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(b_key), 1:numeric, 0:numeric))}) - AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == UNMASK::((42 - ([t1.c_key]))), type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t1.b_key, 'c_key': t1.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) +ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) + AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == t1.unmask_c_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) + AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 0105b05f2..d9e8965b6 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -5,8 +5,8 @@ ROOT(columns=[('name', anything_c_name), ('largest_diff', IFF(ABS(min_revenue_de FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[anything_o_custkey], order=[(anything_o_orderdate):asc_last])), columns={'anything_o_custkey': anything_o_custkey, 'anything_o_orderdate': anything_o_orderdate, 'sum_r': sum_r}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'anything_o_orderdate': ANYTHING(o_orderdate), 'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'anything_o_orderdate': ANYTHING(o_orderdate), 'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 0326f9228..a347a0e11 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,8 +1,8 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(n_rows * IFF(PRESENT(search_id), 1:numeric, 0:numeric) > 0:numeric)}) - AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t1.s_name, 'search_id': t1.search_id}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(count_search_id > 0:numeric)}) + AGGREGATE(keys={'s_name': s_name, 'search_id_0': search_id_0}, aggregations={'count_search_id': COUNT(search_id)}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_id_0': t0.search_id}) JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index cf70ffe8c..7b87e14ce 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index cf70ffe8c..7b87e14ce 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index 453042d6a..dbcd07ede 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 2a8715dbf..806e97eb3 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'n_orders': t1.n_orders, 's_name': t0.s_name}) +ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_orders', count_l_suppkey)], orderings=[(count_l_suppkey):desc_last, (s_name):asc_first], limit=5:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'count_l_suppkey': t1.count_l_suppkey, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'anything_p_name': anything_p_name, 'l_suppkey': l_suppkey, 'n_orders': n_rows * IFF(PRESENT(l_suppkey), 1:numeric, 0:numeric)}) - FILTER(condition=RANKING(args=[], partition=[l_suppkey], order=[(n_rows * IFF(PRESENT(l_suppkey), 1:numeric, 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'l_suppkey': l_suppkey, 'n_rows': n_rows}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(count_l_suppkey):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index 1f1fd0b2e..9cb1bede4 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,6 +1,6 @@ ROOT(columns=[('supplier_name', anything_s_name), ('nation_name', anything_n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[anything_s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[anything_s_nationkey], order=[])):desc_last], limit=5:numeric) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_s_name': ANYTHING(s_name), 'anything_s_nationkey': ANYTHING(s_nationkey), 'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_s_name': ANYTHING(s_name), 'anything_s_nationkey': ANYTHING(s_nationkey), 'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 3e2f698b4..9404df1e1 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,11 +1,11 @@ -ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.co_id == t1.anything_co_id & t0._id_1 == t1.anything__id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(DEFAULT_TO(sum_count_in_device_id, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_count_in_device_id, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) + JOIN(condition=t0.co_id == t1.anything_co_id & t0._id_1 == t1.anything__id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_count_in_device_id': t1.sum_count_in_device_id}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(in_device_id), 1:numeric, 0:numeric))}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'n_rows': COUNT()}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'in_device_id': t1.in_device_id}) + AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_count_in_device_id': SUM(count_in_device_id)}) + AGGREGATE(keys={'de_id': de_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'count_in_device_id': COUNT(in_device_id)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t0.de_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index b883522be..50da2a50f 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.anything_us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) +ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(sum_count_in_device_id, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.anything_us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_count_in_device_id': t1.sum_count_in_device_id, 'sum_n_rows': t0.sum_n_rows}) JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) @@ -13,9 +13,9 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows * IFF(PRESENT(in_device_id), 1:numeric, 0:numeric))}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything_us_country_id': ANYTHING(us_country_id), 'n_rows': COUNT()}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'in_device_id': t1.in_device_id, 'us_country_id': t0.us_country_id}) + AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_count_in_device_id': SUM(count_in_device_id)}) + AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_us_country_id': ANYTHING(us_country_id), 'count_in_device_id': COUNT(in_device_id)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'in_device_id': t1.in_device_id, 'us_country_id': t0.us_country_id}) JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 4d5557ea3..24d59bb6e 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.anything_de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) +ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_count_in_device_id, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.anything_de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_count_in_device_id': t1.sum_count_in_device_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows * IFF(PRESENT(in_device_id), 1:numeric, 0:numeric))}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'n_rows': COUNT()}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'in_device_id': t1.in_device_id}) + AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_count_in_device_id': SUM(count_in_device_id)}) + AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'count_in_device_id': COUNT(in_device_id)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index dd2e4a7e7..0085817db 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', n_rows * IFF(PRESENT(anything_l_suppkey), 1:numeric, 0:numeric))], orderings=[(n_rows * IFF(PRESENT(anything_l_suppkey), 1:numeric, 0:numeric)):desc_last, (anything_s_name):asc_first], limit=10:numeric) - AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT()}) - JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t1.anything_l_suppkey, 's_name': t0.s_name}) +ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', count_anything_l_suppkey)], orderings=[(count_anything_l_suppkey):desc_last, (anything_s_name):asc_first], limit=10:numeric) + AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'count_anything_l_suppkey': COUNT(anything_l_suppkey)}) + JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t1.anything_l_suppkey, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 278a1453a..076214713 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_anything_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) - AGGREGATE(keys={'cntry_code': SLICE(anything_c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_anything_c_acctbal': SUM(anything_c_acctbal)}) - FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) == 0:numeric, columns={'anything_c_acctbal': anything_c_acctbal, 'anything_c_phone': anything_c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_phone': ANYTHING(c_phone), 'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'o_custkey': t1.o_custkey}) - JOIN(condition=t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) - AGGREGATE(keys={}, aggregations={'avg_c_acctbal': AVG(c_acctbal)}) - FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - FILTER(condition=ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) + AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone}) + JOIN(condition=t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) + AGGREGATE(keys={}, aggregations={'avg_c_acctbal': AVG(c_acctbal)}) + FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + FILTER(condition=ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index 92480d257..7d3ebc83a 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) < RELAVG(args=[n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)], partition=[], order=[]) & n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) > 0:numeric, columns={}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) + FILTER(condition=count_o_custkey < RELAVG(args=[count_o_custkey], partition=[], order=[]) & count_o_custkey > 0:numeric, columns={}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index 92480d257..7d3ebc83a 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) < RELAVG(args=[n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)], partition=[], order=[]) & n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) > 0:numeric, columns={}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) + FILTER(condition=count_o_custkey < RELAVG(args=[count_o_custkey], partition=[], order=[]) & count_o_custkey > 0:numeric, columns={}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index 92480d257..7d3ebc83a 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) < RELAVG(args=[n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)], partition=[], order=[]) & n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric) > 0:numeric, columns={}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey}) + FILTER(condition=count_o_custkey < RELAVG(args=[count_o_custkey], partition=[], order=[]) & count_o_custkey > 0:numeric, columns={}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 9ea60e8f5..7b1314906 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ABSENT(n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)) & anything_c_acctbal < RELSUM(args=[n_rows * IFF(PRESENT(o_custkey), 1:numeric, 0:numeric)], partition=[], order=[]), columns={}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'o_custkey': t1.o_custkey}) + FILTER(condition=ABSENT(count_o_custkey) & anything_c_acctbal < RELSUM(args=[count_o_custkey], partition=[], order=[]), columns={}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'count_o_custkey': COUNT(o_custkey)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_9.txt b/tests/test_plan_refsols/window_filter_order_9.txt index 2b6b54e83..13abfb05d 100644 --- a/tests/test_plan_refsols/window_filter_order_9.txt +++ b/tests/test_plan_refsols/window_filter_order_9.txt @@ -1,12 +1,12 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=ABSENT(expr_0) & o_totalprice < 0.05:numeric * RELAVG(args=[total_spent], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - PROJECT(columns={'expr_0': 1:numeric, 'o_custkey': o_custkey, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'o_totalprice': t1.o_totalprice}) + PROJECT(columns={'c_custkey': c_custkey, 'expr_0': 1:numeric, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_totalprice': t1.o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_pydough_functions/tpch_test_functions.py b/tests/test_pydough_functions/tpch_test_functions.py index f3a9d689e..3acb740e0 100644 --- a/tests/test_pydough_functions/tpch_test_functions.py +++ b/tests/test_pydough_functions/tpch_test_functions.py @@ -526,9 +526,7 @@ def impl_tpch_q22(): ) .customers.CALCULATE(cntry_code=phone[:2]) .WHERE( - is_selected_code - & (account_balance > global_avg_balance) - & (COUNT(orders) == 0) + is_selected_code & (account_balance > global_avg_balance) & HASNOT(orders) ) .PARTITION( name="countries", diff --git a/tests/test_qualification.py b/tests/test_qualification.py index 4b2f1d790..d6938851c 100644 --- a/tests/test_qualification.py +++ b/tests/test_qualification.py @@ -544,7 +544,7 @@ │ └─┬─ AccessChild │ ├─── TableCollection[customers] │ ├─── Calculate[cntry_code=SLICE(phone, None, 2, None)] - │ └─┬─ Where[ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']) & (account_balance > global_avg_balance) & (COUNT($1) == 0)] + │ └─┬─ Where[ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']) & (account_balance > global_avg_balance) & HASNOT($1)] │ └─┬─ AccessChild │ └─── SubCollection[orders] ├─┬─ Calculate[CNTRY_CODE=cntry_code, NUM_CUSTS=COUNT($1), TOTACCTBAL=SUM($1.account_balance)] diff --git a/tests/test_sql_refsols/conditional_functions_ansi.sql b/tests/test_sql_refsols/conditional_functions_ansi.sql index a2358fbe5..fa71725a1 100644 --- a/tests/test_sql_refsols/conditional_functions_ansi.sql +++ b/tests/test_sql_refsols/conditional_functions_ansi.sql @@ -21,4 +21,4 @@ LEFT JOIN _s1 AS _s1 WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 GROUP BY - _s1.o_custkey + customer.c_custkey diff --git a/tests/test_sql_refsols/conditional_functions_mysql.sql b/tests/test_sql_refsols/conditional_functions_mysql.sql index 664859f62..3aabbf468 100644 --- a/tests/test_sql_refsols/conditional_functions_mysql.sql +++ b/tests/test_sql_refsols/conditional_functions_mysql.sql @@ -21,4 +21,4 @@ LEFT JOIN _s1 AS _s1 WHERE CUSTOMER.c_acctbal <= 1000 AND CUSTOMER.c_acctbal >= 100 GROUP BY - _s1.o_custkey + CUSTOMER.c_custkey diff --git a/tests/test_sql_refsols/conditional_functions_postgres.sql b/tests/test_sql_refsols/conditional_functions_postgres.sql index 2b912c9d7..bd9ee0f8a 100644 --- a/tests/test_sql_refsols/conditional_functions_postgres.sql +++ b/tests/test_sql_refsols/conditional_functions_postgres.sql @@ -17,4 +17,4 @@ LEFT JOIN _s1 AS _s1 WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 GROUP BY - _s1.o_custkey + customer.c_custkey diff --git a/tests/test_sql_refsols/conditional_functions_snowflake.sql b/tests/test_sql_refsols/conditional_functions_snowflake.sql index 8b6df1849..56a55ff92 100644 --- a/tests/test_sql_refsols/conditional_functions_snowflake.sql +++ b/tests/test_sql_refsols/conditional_functions_snowflake.sql @@ -21,4 +21,4 @@ LEFT JOIN _s1 AS _s1 WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 GROUP BY - _s1.o_custkey + customer.c_custkey diff --git a/tests/test_sql_refsols/conditional_functions_sqlite.sql b/tests/test_sql_refsols/conditional_functions_sqlite.sql index 49b3424a8..e4a5e7f45 100644 --- a/tests/test_sql_refsols/conditional_functions_sqlite.sql +++ b/tests/test_sql_refsols/conditional_functions_sqlite.sql @@ -17,4 +17,4 @@ LEFT JOIN _s1 AS _s1 WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 GROUP BY - _s1.o_custkey + customer.c_custkey diff --git a/tests/test_sql_refsols/correl_1_sqlite.sql b/tests/test_sql_refsols/correl_1_sqlite.sql index 3491077be..036dcf89c 100644 --- a/tests/test_sql_refsols/correl_1_sqlite.sql +++ b/tests/test_sql_refsols/correl_1_sqlite.sql @@ -1,24 +1,19 @@ WITH _s1 AS ( SELECT - n_name, - n_regionkey - FROM tpch.nation -), _t0 AS ( - SELECT - _s1.n_regionkey, - MAX(region.r_name) AS anything_r_name, + SUBSTRING(n_name, 1, 1) AS expr_1, + n_regionkey, COUNT(*) AS n_rows - FROM tpch.region AS region - LEFT JOIN _s1 AS _s1 - ON SUBSTRING(_s1.n_name, 1, 1) = SUBSTRING(region.r_name, 1, 1) - AND _s1.n_regionkey = region.r_regionkey + FROM tpch.nation GROUP BY 1, - SUBSTRING(_s1.n_name, 1, 1) + 2 ) SELECT - anything_r_name AS region_name, - n_rows * IIF(NOT n_regionkey IS NULL, 1, 0) AS n_prefix_nations -FROM _t0 + region.r_name AS region_name, + COALESCE(_s1.n_rows, 0) AS n_prefix_nations +FROM tpch.region AS region +LEFT JOIN _s1 AS _s1 + ON _s1.expr_1 = SUBSTRING(region.r_name, 1, 1) + AND _s1.n_regionkey = region.r_regionkey ORDER BY 1 diff --git a/tests/test_sql_refsols/correl_2_sqlite.sql b/tests/test_sql_refsols/correl_2_sqlite.sql index 6fc203eb2..877197ded 100644 --- a/tests/test_sql_refsols/correl_2_sqlite.sql +++ b/tests/test_sql_refsols/correl_2_sqlite.sql @@ -1,28 +1,23 @@ WITH _s3 AS ( SELECT - c_comment, - c_nationkey - FROM tpch.customer -), _t0 AS ( - SELECT - _s3.c_nationkey, - MAX(nation.n_name) AS anything_n_name, + SUBSTRING(c_comment, 1, 1) AS expr_1, + c_nationkey, COUNT(*) AS n_rows - FROM tpch.region AS region - JOIN tpch.nation AS nation - ON nation.n_regionkey = region.r_regionkey - LEFT JOIN _s3 AS _s3 - ON LOWER(SUBSTRING(region.r_name, 1, 1)) = SUBSTRING(_s3.c_comment, 1, 1) - AND _s3.c_nationkey = nation.n_nationkey - WHERE - NOT region.r_name LIKE 'A%' + FROM tpch.customer GROUP BY 1, - SUBSTRING(_s3.c_comment, 1, 1) + 2 ) SELECT - anything_n_name AS name, - n_rows * IIF(NOT c_nationkey IS NULL, 1, 0) AS n_selected_custs -FROM _t0 + nation.n_name AS name, + COALESCE(_s3.n_rows, 0) AS n_selected_custs +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey +LEFT JOIN _s3 AS _s3 + ON _s3.c_nationkey = nation.n_nationkey + AND _s3.expr_1 = LOWER(SUBSTRING(region.r_name, 1, 1)) +WHERE + NOT region.r_name LIKE 'A%' ORDER BY 1 diff --git a/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql index ab853bfb9..eef2a3632 100644 --- a/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_general_join_01_raw_sqlite.sql @@ -10,8 +10,13 @@ WITH _s0 AS ( FROM crbnk.customers ), _s7 AS ( SELECT + ( + 42 - ( + _s3.c_key + ) + ) AS unmask_c_key, _s2.b_key, - _s3.c_key + COUNT(*) AS n_rows FROM _s0 AS _s2 JOIN _s1 AS _s3 ON SUBSTRING( @@ -75,15 +80,35 @@ WITH _s0 AS ( ON _s2.b_key = accounts.a_branchkey AND accounts.a_custkey = ( 42 - _s3.c_key ) -), _t0 AS ( - SELECT - _s7.b_key, - COUNT(*) AS n_rows - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON SUBSTRING( - SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1), - CASE + GROUP BY + 1, + 2 +) +SELECT + _s0.b_key AS branch_key, + COUNT(*) AS n_local_cust, + COALESCE(SUM(_s7.n_rows), 0) AS n_local_cust_local_acct +FROM _s0 AS _s0 +JOIN _s1 AS _s1 + ON SUBSTRING( + SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1), + CASE + WHEN ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 + ) < 1 + THEN 1 + ELSE ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 + ) + END, + CASE + WHEN ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 + ) < 1 + THEN 0 + ELSE ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 + ) - CASE WHEN ( LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 ) < 1 @@ -91,27 +116,27 @@ WITH _s0 AS ( ELSE ( LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 ) - END, - CASE - WHEN ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 - ) < 1 - THEN 0 - ELSE ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 - ) - CASE - WHEN ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 - ) < 1 - THEN 1 - ELSE ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 - ) - END END - ) = SUBSTRING( - _s0.b_addr, - CASE + END + ) = SUBSTRING( + _s0.b_addr, + CASE + WHEN ( + LENGTH(_s0.b_addr) + -7 + ) < 1 + THEN 1 + ELSE ( + LENGTH(_s0.b_addr) + -7 + ) + END, + CASE + WHEN ( + LENGTH(_s0.b_addr) + -5 + ) < 1 + THEN 0 + ELSE ( + LENGTH(_s0.b_addr) + -5 + ) - CASE WHEN ( LENGTH(_s0.b_addr) + -7 ) < 1 @@ -119,45 +144,12 @@ WITH _s0 AS ( ELSE ( LENGTH(_s0.b_addr) + -7 ) - END, - CASE - WHEN ( - LENGTH(_s0.b_addr) + -5 - ) < 1 - THEN 0 - ELSE ( - LENGTH(_s0.b_addr) + -5 - ) - CASE - WHEN ( - LENGTH(_s0.b_addr) + -7 - ) < 1 - THEN 1 - ELSE ( - LENGTH(_s0.b_addr) + -7 - ) - END END - ) - LEFT JOIN _s7 AS _s7 - ON ( - 42 - _s1.c_key - ) = ( - 42 - _s7.c_key - ) AND _s0.b_key = _s7.b_key - GROUP BY - 1, - ( - 42 - ( - _s7.c_key - ) - ) -) -SELECT - b_key AS branch_key, - COUNT(*) AS n_local_cust, - SUM(n_rows * IIF(NOT ( - b_key IS NULL - ), 1, 0)) AS n_local_cust_local_acct -FROM _t0 + END + ) +LEFT JOIN _s7 AS _s7 + ON _s0.b_key = _s7.b_key AND _s7.unmask_c_key = ( + 42 - _s1.c_key + ) GROUP BY 1 diff --git a/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql index ab853bfb9..eef2a3632 100644 --- a/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_general_join_01_rewrite_sqlite.sql @@ -10,8 +10,13 @@ WITH _s0 AS ( FROM crbnk.customers ), _s7 AS ( SELECT + ( + 42 - ( + _s3.c_key + ) + ) AS unmask_c_key, _s2.b_key, - _s3.c_key + COUNT(*) AS n_rows FROM _s0 AS _s2 JOIN _s1 AS _s3 ON SUBSTRING( @@ -75,15 +80,35 @@ WITH _s0 AS ( ON _s2.b_key = accounts.a_branchkey AND accounts.a_custkey = ( 42 - _s3.c_key ) -), _t0 AS ( - SELECT - _s7.b_key, - COUNT(*) AS n_rows - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON SUBSTRING( - SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1), - CASE + GROUP BY + 1, + 2 +) +SELECT + _s0.b_key AS branch_key, + COUNT(*) AS n_local_cust, + COALESCE(SUM(_s7.n_rows), 0) AS n_local_cust_local_acct +FROM _s0 AS _s0 +JOIN _s1 AS _s1 + ON SUBSTRING( + SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1), + CASE + WHEN ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 + ) < 1 + THEN 1 + ELSE ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 + ) + END, + CASE + WHEN ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 + ) < 1 + THEN 0 + ELSE ( + LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 + ) - CASE WHEN ( LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 ) < 1 @@ -91,27 +116,27 @@ WITH _s0 AS ( ELSE ( LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 ) - END, - CASE - WHEN ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 - ) < 1 - THEN 0 - ELSE ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -5 - ) - CASE - WHEN ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 - ) < 1 - THEN 1 - ELSE ( - LENGTH(SUBSTRING(_s1.c_addr, -1) || SUBSTRING(_s1.c_addr, 1, LENGTH(_s1.c_addr) - 1)) + -7 - ) - END END - ) = SUBSTRING( - _s0.b_addr, - CASE + END + ) = SUBSTRING( + _s0.b_addr, + CASE + WHEN ( + LENGTH(_s0.b_addr) + -7 + ) < 1 + THEN 1 + ELSE ( + LENGTH(_s0.b_addr) + -7 + ) + END, + CASE + WHEN ( + LENGTH(_s0.b_addr) + -5 + ) < 1 + THEN 0 + ELSE ( + LENGTH(_s0.b_addr) + -5 + ) - CASE WHEN ( LENGTH(_s0.b_addr) + -7 ) < 1 @@ -119,45 +144,12 @@ WITH _s0 AS ( ELSE ( LENGTH(_s0.b_addr) + -7 ) - END, - CASE - WHEN ( - LENGTH(_s0.b_addr) + -5 - ) < 1 - THEN 0 - ELSE ( - LENGTH(_s0.b_addr) + -5 - ) - CASE - WHEN ( - LENGTH(_s0.b_addr) + -7 - ) < 1 - THEN 1 - ELSE ( - LENGTH(_s0.b_addr) + -7 - ) - END END - ) - LEFT JOIN _s7 AS _s7 - ON ( - 42 - _s1.c_key - ) = ( - 42 - _s7.c_key - ) AND _s0.b_key = _s7.b_key - GROUP BY - 1, - ( - 42 - ( - _s7.c_key - ) - ) -) -SELECT - b_key AS branch_key, - COUNT(*) AS n_local_cust, - SUM(n_rows * IIF(NOT ( - b_key IS NULL - ), 1, 0)) AS n_local_cust_local_acct -FROM _t0 + END + ) +LEFT JOIN _s7 AS _s7 + ON _s0.b_key = _s7.b_key AND _s7.unmask_c_key = ( + 42 - _s1.c_key + ) GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql index 7b1d941c0..be57d4b91 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql @@ -1,28 +1,24 @@ WITH _s1 AS ( SELECT + EXTRACT(MONTH FROM CAST(sbtxdatetime AS DATETIME)) AS month_sbtxdatetime, + EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) AS year_sbtxdatetime, sbtxcustid, - sbtxdatetime - FROM main.sbtransaction -), _t0 AS ( - SELECT - _s1.sbtxcustid, - ANY_VALUE(sbcustomer.sbcustname) AS anything_sbcustname, COUNT(*) AS n_rows - FROM main.sbcustomer AS sbcustomer - LEFT JOIN _s1 AS _s1 - ON EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS DATETIME)) = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) - AND EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS DATETIME)) = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) - AND _s1.sbtxcustid = sbcustomer.sbcustid + FROM main.sbtransaction GROUP BY - EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS DATETIME)), - EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS DATETIME)), - 1 + 1, + 2, + 3 ) SELECT - sbtxcustid AS _id, - anything_sbcustname AS name, - n_rows * CASE WHEN NOT sbtxcustid IS NULL THEN 1 ELSE 0 END AS num_transactions -FROM _t0 + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_transactions +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.month_sbtxdatetime = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) + AND _s1.sbtxcustid = sbcustomer.sbcustid + AND _s1.year_sbtxdatetime = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS DATETIME)) ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_mysql.sql b/tests/test_sql_refsols/defog_broker_adv10_mysql.sql index 2d9dd038a..7f2e42798 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_mysql.sql @@ -1,28 +1,24 @@ WITH _s1 AS ( SELECT + EXTRACT(MONTH FROM CAST(sbtxdatetime AS DATETIME)) AS month_sbTxDateTime, + EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) AS year_sbTxDateTime, sbtxcustid AS sbTxCustId, - sbtxdatetime AS sbTxDateTime - FROM main.sbTransaction -), _t0 AS ( - SELECT - _s1.sbTxCustId, - ANY_VALUE(sbCustomer.sbcustname) AS anything_sbCustName, COUNT(*) AS n_rows - FROM main.sbCustomer AS sbCustomer - LEFT JOIN _s1 AS _s1 - ON EXTRACT(MONTH FROM CAST(_s1.sbTxDateTime AS DATETIME)) = EXTRACT(MONTH FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) - AND EXTRACT(YEAR FROM CAST(_s1.sbTxDateTime AS DATETIME)) = EXTRACT(YEAR FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) - AND _s1.sbTxCustId = sbCustomer.sbcustid + FROM main.sbTransaction GROUP BY - EXTRACT(MONTH FROM CAST(_s1.sbTxDateTime AS DATETIME)), - EXTRACT(YEAR FROM CAST(_s1.sbTxDateTime AS DATETIME)), - 1 + 1, + 2, + 3 ) SELECT - sbTxCustId AS _id, - anything_sbCustName AS name, - n_rows * CASE WHEN NOT sbTxCustId IS NULL THEN 1 ELSE 0 END AS num_transactions -FROM _t0 + sbCustomer.sbcustid AS _id, + sbCustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_transactions +FROM main.sbCustomer AS sbCustomer +LEFT JOIN _s1 AS _s1 + ON _s1.month_sbTxDateTime = EXTRACT(MONTH FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) + AND _s1.sbTxCustId = sbCustomer.sbcustid + AND _s1.year_sbTxDateTime = EXTRACT(YEAR FROM CAST(sbCustomer.sbcustjoindate AS DATETIME)) ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_postgres.sql b/tests/test_sql_refsols/defog_broker_adv10_postgres.sql index 546ff5aee..da91d43e9 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_postgres.sql @@ -1,28 +1,24 @@ WITH _s1 AS ( SELECT + EXTRACT(MONTH FROM CAST(sbtxdatetime AS TIMESTAMP)) AS month_sbtxdatetime, + EXTRACT(YEAR FROM CAST(sbtxdatetime AS TIMESTAMP)) AS year_sbtxdatetime, sbtxcustid, - sbtxdatetime - FROM main.sbtransaction -), _t0 AS ( - SELECT - _s1.sbtxcustid, - MAX(sbcustomer.sbcustname) AS anything_sbcustname, COUNT(*) AS n_rows - FROM main.sbcustomer AS sbcustomer - LEFT JOIN _s1 AS _s1 - ON EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)) = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) - AND EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)) = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) - AND _s1.sbtxcustid = sbcustomer.sbcustid + FROM main.sbtransaction GROUP BY - EXTRACT(MONTH FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)), - EXTRACT(YEAR FROM CAST(_s1.sbtxdatetime AS TIMESTAMP)), - 1 + 1, + 2, + 3 ) SELECT - sbtxcustid AS _id, - anything_sbcustname AS name, - n_rows * CASE WHEN NOT sbtxcustid IS NULL THEN 1 ELSE 0 END AS num_transactions -FROM _t0 + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_transactions +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.month_sbtxdatetime = EXTRACT(MONTH FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND _s1.sbtxcustid = sbcustomer.sbcustid + AND _s1.year_sbtxdatetime = EXTRACT(YEAR FROM CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) ORDER BY 3 DESC NULLS LAST LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql index b03ae01b8..908f8925f 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql @@ -1,28 +1,24 @@ WITH _s1 AS ( SELECT + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) AS month_sbtxdatetime, + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) AS year_sbtxdatetime, sbtxcustid, - sbtxdatetime - FROM main.sbtransaction -), _t0 AS ( - SELECT - _s1.sbtxcustid, - ANY_VALUE(sbcustomer.sbcustname) AS anything_sbcustname, COUNT(*) AS n_rows - FROM main.sbcustomer AS sbcustomer - LEFT JOIN _s1 AS _s1 - ON MONTH(CAST(_s1.sbtxdatetime AS TIMESTAMP)) = MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) - AND YEAR(CAST(_s1.sbtxdatetime AS TIMESTAMP)) = YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) - AND _s1.sbtxcustid = sbcustomer.sbcustid + FROM main.sbtransaction GROUP BY 1, - MONTH(CAST(_s1.sbtxdatetime AS TIMESTAMP)), - YEAR(CAST(_s1.sbtxdatetime AS TIMESTAMP)) + 2, + 3 ) SELECT - sbtxcustid AS _id, - anything_sbcustname AS name, - n_rows * IFF(NOT sbtxcustid IS NULL, 1, 0) AS num_transactions -FROM _t0 + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_transactions +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.month_sbtxdatetime = MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND _s1.sbtxcustid = sbcustomer.sbcustid + AND _s1.year_sbtxdatetime = YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) ORDER BY 3 DESC NULLS LAST LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql index 54a7bab50..3d0abe9d0 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql @@ -1,28 +1,24 @@ WITH _s1 AS ( SELECT + CAST(STRFTIME('%m', sbtxdatetime) AS INTEGER) AS month_sbtxdatetime, + CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS year_sbtxdatetime, sbtxcustid, - sbtxdatetime - FROM main.sbtransaction -), _t0 AS ( - SELECT - _s1.sbtxcustid, - MAX(sbcustomer.sbcustname) AS anything_sbcustname, COUNT(*) AS n_rows - FROM main.sbcustomer AS sbcustomer - LEFT JOIN _s1 AS _s1 - ON CAST(STRFTIME('%Y', _s1.sbtxdatetime) AS INTEGER) = CAST(STRFTIME('%Y', sbcustomer.sbcustjoindate) AS INTEGER) - AND CAST(STRFTIME('%m', _s1.sbtxdatetime) AS INTEGER) = CAST(STRFTIME('%m', sbcustomer.sbcustjoindate) AS INTEGER) - AND _s1.sbtxcustid = sbcustomer.sbcustid + FROM main.sbtransaction GROUP BY - CAST(STRFTIME('%Y', _s1.sbtxdatetime) AS INTEGER), - CAST(STRFTIME('%m', _s1.sbtxdatetime) AS INTEGER), - 1 + 1, + 2, + 3 ) SELECT - sbtxcustid AS _id, - anything_sbcustname AS name, - n_rows * IIF(NOT sbtxcustid IS NULL, 1, 0) AS num_transactions -FROM _t0 + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_transactions +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.month_sbtxdatetime = CAST(STRFTIME('%m', sbcustomer.sbcustjoindate) AS INTEGER) + AND _s1.sbtxcustid = sbcustomer.sbcustid + AND _s1.year_sbtxdatetime = CAST(STRFTIME('%Y', sbcustomer.sbcustjoindate) AS INTEGER) ORDER BY 3 DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index ca99bf581..a9792054c 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -2,22 +2,15 @@ WITH _s1 AS ( SELECT car_id FROM main.sales -), _t0 AS ( - SELECT - _s1.car_id, - ANY_VALUE(cars.make) AS anything_make, - ANY_VALUE(cars.model) AS anything_model, - COUNT(*) AS n_rows - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - LOWER(cars.vin_number) LIKE '%m5%' - GROUP BY - 1 ) SELECT - anything_make AS make, - anything_model AS model, - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales -FROM _t0 + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + COUNT(_s1.car_id) AS num_sales +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.vin_number) LIKE '%m5%' +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index ca99bf581..a9792054c 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -2,22 +2,15 @@ WITH _s1 AS ( SELECT car_id FROM main.sales -), _t0 AS ( - SELECT - _s1.car_id, - ANY_VALUE(cars.make) AS anything_make, - ANY_VALUE(cars.model) AS anything_model, - COUNT(*) AS n_rows - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - LOWER(cars.vin_number) LIKE '%m5%' - GROUP BY - 1 ) SELECT - anything_make AS make, - anything_model AS model, - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales -FROM _t0 + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + COUNT(_s1.car_id) AS num_sales +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.vin_number) LIKE '%m5%' +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index d43b5dd36..64026756e 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -2,22 +2,15 @@ WITH _s1 AS ( SELECT car_id FROM main.sales -), _t0 AS ( - SELECT - _s1.car_id, - MAX(cars.make) AS anything_make, - MAX(cars.model) AS anything_model, - COUNT(*) AS n_rows - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - LOWER(cars.vin_number) LIKE '%m5%' - GROUP BY - 1 ) SELECT - anything_make AS make, - anything_model AS model, - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales -FROM _t0 + MAX(cars.make) AS make, + MAX(cars.model) AS model, + COUNT(_s1.car_id) AS num_sales +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.vin_number) LIKE '%m5%' +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index 1246f42ad..a88aaf0c0 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -2,22 +2,15 @@ WITH _s1 AS ( SELECT car_id FROM main.sales -), _t0 AS ( - SELECT - _s1.car_id, - ANY_VALUE(cars.make) AS anything_make, - ANY_VALUE(cars.model) AS anything_model, - COUNT(*) AS n_rows - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - CONTAINS(LOWER(cars.vin_number), 'm5') - GROUP BY - 1 ) SELECT - anything_make AS make, - anything_model AS model, - n_rows * IFF(NOT car_id IS NULL, 1, 0) AS num_sales -FROM _t0 + ANY_VALUE(cars.make) AS make, + ANY_VALUE(cars.model) AS model, + COUNT(_s1.car_id) AS num_sales +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + CONTAINS(LOWER(cars.vin_number), 'm5') +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index 59fdcf024..64026756e 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -2,22 +2,15 @@ WITH _s1 AS ( SELECT car_id FROM main.sales -), _t0 AS ( - SELECT - _s1.car_id, - MAX(cars.make) AS anything_make, - MAX(cars.model) AS anything_model, - COUNT(*) AS n_rows - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - LOWER(cars.vin_number) LIKE '%m5%' - GROUP BY - 1 ) SELECT - anything_make AS make, - anything_model AS model, - n_rows * IIF(NOT car_id IS NULL, 1, 0) AS num_sales -FROM _t0 + MAX(cars.make) AS make, + MAX(cars.model) AS model, + COUNT(_s1.car_id) AS num_sales +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.vin_number) LIKE '%m5%' +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql index b4fdda127..1d0e2975f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql @@ -5,26 +5,14 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) -), _t0 AS ( - SELECT - _s1.car_id, - COUNT(*) AS n_rows, - SUM(_s1.sale_price) AS sum_sale_price - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - LOWER(cars.make) LIKE '%toyota%' - GROUP BY - 1 ) SELECT - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales, - CASE - WHEN ( - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END - ) > 0 - THEN COALESCE(sum_sale_price, 0) - ELSE NULL - END AS total_revenue -FROM _t0 + COUNT(_s1.car_id) AS num_sales, + CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.make) LIKE '%toyota%' +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql index d1690b727..8cf17c3b8 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql @@ -5,26 +5,14 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) -), _t0 AS ( - SELECT - _s1.car_id, - COUNT(*) AS n_rows, - SUM(_s1.sale_price) AS sum_sale_price - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - LOWER(cars.make) LIKE '%toyota%' - GROUP BY - 1 ) SELECT - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales, - CASE - WHEN ( - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END - ) > 0 - THEN COALESCE(sum_sale_price, 0) - ELSE NULL - END AS total_revenue -FROM _t0 + COUNT(_s1.car_id) AS num_sales, + CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.make) LIKE '%toyota%' +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql index 1c1767fa3..ef1baa533 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql @@ -5,26 +5,14 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' -), _t0 AS ( - SELECT - _s1.car_id, - COUNT(*) AS n_rows, - SUM(_s1.sale_price) AS sum_sale_price - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - LOWER(cars.make) LIKE '%toyota%' - GROUP BY - 1 ) SELECT - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END AS num_sales, - CASE - WHEN ( - n_rows * CASE WHEN NOT car_id IS NULL THEN 1 ELSE 0 END - ) > 0 - THEN COALESCE(sum_sale_price, 0) - ELSE NULL - END AS total_revenue -FROM _t0 + COUNT(_s1.car_id) AS num_sales, + CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.make) LIKE '%toyota%' +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql index 792fa3181..e0fbaeb7a 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -5,26 +5,14 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) -), _t0 AS ( - SELECT - _s1.car_id, - COUNT(*) AS n_rows, - SUM(_s1.sale_price) AS sum_sale_price - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - CONTAINS(LOWER(cars.make), 'toyota') - GROUP BY - 1 ) SELECT - n_rows * IFF(NOT car_id IS NULL, 1, 0) AS num_sales, - CASE - WHEN ( - n_rows * IFF(NOT car_id IS NULL, 1, 0) - ) > 0 - THEN COALESCE(sum_sale_price, 0) - ELSE NULL - END AS total_revenue -FROM _t0 + COUNT(_s1.car_id) AS num_sales, + CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + CONTAINS(LOWER(cars.make), 'toyota') +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql index 47d615e83..f8fd3d291 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql @@ -5,26 +5,14 @@ WITH _s1 AS ( FROM main.sales WHERE sale_date >= DATETIME('now', '-30 day') -), _t0 AS ( - SELECT - _s1.car_id, - COUNT(*) AS n_rows, - SUM(_s1.sale_price) AS sum_sale_price - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - WHERE - LOWER(cars.make) LIKE '%toyota%' - GROUP BY - 1 ) SELECT - n_rows * IIF(NOT car_id IS NULL, 1, 0) AS num_sales, - CASE - WHEN ( - n_rows * IIF(NOT car_id IS NULL, 1, 0) - ) > 0 - THEN COALESCE(sum_sale_price, 0) - ELSE NULL - END AS total_revenue -FROM _t0 + COUNT(_s1.car_id) AS num_sales, + CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.make) LIKE '%toyota%' +GROUP BY + cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql index 1f5c1833f..7520ba4e5 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql @@ -21,6 +21,6 @@ JOIN _t2 AS _t2 LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id GROUP BY - _s3.car_id + cars._id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql index 8e9481527..4aa84c3d5 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql @@ -20,6 +20,6 @@ JOIN _t AS _t LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id GROUP BY - _s3.car_id + cars._id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql index 1b75d1711..7ade64db5 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql @@ -20,6 +20,6 @@ JOIN _t AS _t LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id GROUP BY - _s3.car_id + cars._id ORDER BY 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql index 4dfa78a81..f23654281 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql @@ -21,6 +21,6 @@ JOIN _t2 AS _t2 LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id GROUP BY - _s3.car_id + cars._id ORDER BY 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql index 78e7d7c01..6c91d9155 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql @@ -20,6 +20,6 @@ JOIN _t AS _t LEFT JOIN _s3 AS _s3 ON _s3.car_id = cars._id GROUP BY - _s3.car_id + cars._id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql index 9b15444aa..583b1362e 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql @@ -12,7 +12,7 @@ WITH _s3 AS ( LEFT JOIN _s3 AS _s3 ON _s3.patient_id = patients.patient_id GROUP BY - _s3.patient_id + patients.patient_id ), _t0 AS ( SELECT min_year_start_dt, diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql index da0175bb0..bb262687c 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql @@ -20,7 +20,7 @@ WITH _u_0 AS ( WHERE NOT _u_0._u_1 IS NULL GROUP BY - _s3.patient_id + patients.patient_id ), _t0 AS ( SELECT min_year_start_dt, diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql index dab36dc15..3658de2f2 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql @@ -20,7 +20,7 @@ WITH _u_0 AS ( WHERE NOT _u_0._u_1 IS NULL GROUP BY - _s3.patient_id + patients.patient_id ), _t0 AS ( SELECT min_year_start_dt, diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql index 246a11650..00510540c 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql @@ -20,7 +20,7 @@ WITH _u_0 AS ( WHERE NOT _u_0._u_1 IS NULL GROUP BY - _s3.patient_id + patients.patient_id ), _t0 AS ( SELECT min_year_start_dt, diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql index 8ee828136..fe455efe8 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql @@ -20,7 +20,7 @@ WITH _u_0 AS ( WHERE NOT _u_0._u_1 IS NULL GROUP BY - _s3.patient_id + patients.patient_id ), _t0 AS ( SELECT min_year_start_dt, diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql index f7b7df7b3..36a527030 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql @@ -5,7 +5,7 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily ) SELECT - _s1.coupon_id, + coupons.cid AS coupon_id, COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql index f7b7df7b3..36a527030 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql @@ -5,7 +5,7 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily ) SELECT - _s1.coupon_id, + coupons.cid AS coupon_id, COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql index f7b7df7b3..36a527030 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql @@ -5,7 +5,7 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily ) SELECT - _s1.coupon_id, + coupons.cid AS coupon_id, COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql index f7b7df7b3..36a527030 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql @@ -5,7 +5,7 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily ) SELECT - _s1.coupon_id, + coupons.cid AS coupon_id, COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql index f7b7df7b3..36a527030 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql @@ -5,7 +5,7 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily ) SELECT - _s1.coupon_id, + coupons.cid AS coupon_id, COALESCE(SUM(_s1.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 98f352c3f..9da610e5d 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -6,23 +6,16 @@ WITH _s1 AS ( WHERE created_at >= DATE_TRUNC('DAY', DATE_SUB(CURRENT_TIMESTAMP(), 150, DAY)) AND receiver_type = 1 -), _t0 AS ( - SELECT - _s1.receiver_id, - ANY_VALUE(merchants.name) AS anything_name, - COUNT(*) AS n_rows, - SUM(_s1.amount) AS sum_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - GROUP BY - 1 ) SELECT - anything_name AS merchant_name, - n_rows * CASE WHEN NOT receiver_id IS NULL THEN 1 ELSE 0 END AS total_transactions, - COALESCE(sum_amount, 0) AS total_amount -FROM _t0 + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid +GROUP BY + merchants.mid ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql index 9f2637179..f7f6babf0 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql @@ -6,23 +6,16 @@ WITH _s1 AS ( WHERE created_at >= CAST(DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '150' DAY) AS DATE) AND receiver_type = 1 -), _t0 AS ( - SELECT - _s1.receiver_id, - ANY_VALUE(merchants.name) AS anything_name, - COUNT(*) AS n_rows, - SUM(_s1.amount) AS sum_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - GROUP BY - 1 ) SELECT - anything_name AS merchant_name, - n_rows * CASE WHEN NOT receiver_id IS NULL THEN 1 ELSE 0 END AS total_transactions, - COALESCE(sum_amount, 0) AS total_amount -FROM _t0 + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid +GROUP BY + merchants.mid ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql index 1aa23732c..1c7b93bbe 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql @@ -6,23 +6,16 @@ WITH _s1 AS ( WHERE created_at >= DATE_TRUNC('DAY', CURRENT_TIMESTAMP - INTERVAL '150 DAY') AND receiver_type = 1 -), _t0 AS ( - SELECT - _s1.receiver_id, - MAX(merchants.name) AS anything_name, - COUNT(*) AS n_rows, - SUM(_s1.amount) AS sum_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - GROUP BY - 1 ) SELECT - anything_name AS merchant_name, - n_rows * CASE WHEN NOT receiver_id IS NULL THEN 1 ELSE 0 END AS total_transactions, - COALESCE(sum_amount, 0) AS total_amount -FROM _t0 + MAX(merchants.name) AS merchant_name, + COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid +GROUP BY + merchants.mid ORDER BY 3 DESC NULLS LAST LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql index 15a78ae5e..3618ff61c 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql @@ -6,23 +6,16 @@ WITH _s1 AS ( WHERE created_at >= DATE_TRUNC('DAY', DATEADD(DAY, -150, CURRENT_TIMESTAMP())) AND receiver_type = 1 -), _t0 AS ( - SELECT - _s1.receiver_id, - ANY_VALUE(merchants.name) AS anything_name, - COUNT(*) AS n_rows, - SUM(_s1.amount) AS sum_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - GROUP BY - 1 ) SELECT - anything_name AS merchant_name, - n_rows * IFF(NOT receiver_id IS NULL, 1, 0) AS total_transactions, - COALESCE(sum_amount, 0) AS total_amount -FROM _t0 + ANY_VALUE(merchants.name) AS merchant_name, + COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid +GROUP BY + merchants.mid ORDER BY 3 DESC NULLS LAST LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 22e5635b6..43b6b1cf6 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -6,23 +6,16 @@ WITH _s1 AS ( WHERE created_at >= DATE(DATETIME('now', '-150 day'), 'start of day') AND receiver_type = 1 -), _t0 AS ( - SELECT - _s1.receiver_id, - MAX(merchants.name) AS anything_name, - COUNT(*) AS n_rows, - SUM(_s1.amount) AS sum_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - GROUP BY - 1 ) SELECT - anything_name AS merchant_name, - n_rows * IIF(NOT receiver_id IS NULL, 1, 0) AS total_transactions, - COALESCE(sum_amount, 0) AS total_amount -FROM _t0 + MAX(merchants.name) AS merchant_name, + COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid +GROUP BY + merchants.mid ORDER BY 3 DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index 9f466815c..ef2037e72 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -13,7 +13,7 @@ FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid GROUP BY - _s1.coupon_id + coupons.cid ORDER BY 2 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql index 9f466815c..ef2037e72 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql @@ -13,7 +13,7 @@ FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid GROUP BY - _s1.coupon_id + coupons.cid ORDER BY 2 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql index f8fd39f72..6995be3d8 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql @@ -13,7 +13,7 @@ FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid GROUP BY - _s1.coupon_id + coupons.cid ORDER BY 2 DESC NULLS LAST LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql index 7004136d7..e5af28e0c 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql @@ -13,7 +13,7 @@ FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid GROUP BY - _s1.coupon_id + coupons.cid ORDER BY 2 DESC NULLS LAST LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index 12fb85faf..ed99eb3af 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -13,7 +13,7 @@ FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid GROUP BY - _s1.coupon_id + coupons.cid ORDER BY 2 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql b/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql index a2979cbc9..1e7e1f73f 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql @@ -18,8 +18,8 @@ WITH _t1 AS ( FROM main.coupons ), _s4 AS ( SELECT - _s3.merchant_id, - _s3.start_date, + merchants.mid, + _s1.min_start_date, ANY_VALUE(merchants.created_at) AS anything_created_at, MAX(_s3.cid) AS max_cid FROM main.merchants AS merchants @@ -32,11 +32,11 @@ WITH _t1 AS ( 2 ) SELECT - _s4.merchant_id AS merchants_id, + _s4.mid AS merchants_id, _s4.anything_created_at AS merchant_registration_date, - _s4.start_date AS earliest_coupon_start_date, + _s4.min_start_date AS earliest_coupon_start_date, _s4.max_cid AS earliest_coupon_id FROM _s4 AS _s4 JOIN _t1 AS _s5 - ON _s4.merchant_id = _s5.merchant_id + ON _s4.mid = _s5.merchant_id AND _s5.start_date <= DATE_ADD(CAST(_s4.anything_created_at AS TIMESTAMP), 1, 'YEAR') diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql b/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql index fa0f503e2..630ebae9a 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql @@ -18,8 +18,8 @@ WITH _t1 AS ( FROM main.coupons ), _s4 AS ( SELECT - _s3.merchant_id, - _s3.start_date, + merchants.mid, + _s1.min_start_date, ANY_VALUE(merchants.created_at) AS anything_created_at, MAX(_s3.cid) AS max_cid FROM main.merchants AS merchants @@ -32,11 +32,11 @@ WITH _t1 AS ( 2 ) SELECT - _s4.merchant_id AS merchants_id, + _s4.mid AS merchants_id, _s4.anything_created_at AS merchant_registration_date, - _s4.start_date AS earliest_coupon_start_date, + _s4.min_start_date AS earliest_coupon_start_date, _s4.max_cid AS earliest_coupon_id FROM _s4 AS _s4 JOIN _t1 AS _s5 - ON _s4.merchant_id = _s5.merchant_id + ON _s4.mid = _s5.merchant_id AND _s5.start_date <= DATE_ADD(CAST(_s4.anything_created_at AS DATETIME), INTERVAL '1' YEAR) diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql b/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql index 5004a06ec..170ef19bd 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql @@ -18,8 +18,8 @@ WITH _t1 AS ( FROM main.coupons ), _s4 AS ( SELECT - _s3.merchant_id, - _s3.start_date, + merchants.mid, + _s1.min_start_date, MAX(merchants.created_at) AS anything_created_at, MAX(_s3.cid) AS max_cid FROM main.merchants AS merchants @@ -32,11 +32,11 @@ WITH _t1 AS ( 2 ) SELECT - _s4.merchant_id AS merchants_id, + _s4.mid AS merchants_id, _s4.anything_created_at AS merchant_registration_date, - _s4.start_date AS earliest_coupon_start_date, + _s4.min_start_date AS earliest_coupon_start_date, _s4.max_cid AS earliest_coupon_id FROM _s4 AS _s4 JOIN _t1 AS _s5 - ON _s4.merchant_id = _s5.merchant_id + ON _s4.mid = _s5.merchant_id AND _s5.start_date <= CAST(_s4.anything_created_at AS TIMESTAMP) + INTERVAL '1 YEAR' diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql index 7ebb2f8df..18bae48fc 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql @@ -18,8 +18,8 @@ WITH _t1 AS ( FROM main.coupons ), _s4 AS ( SELECT - _s3.merchant_id, - _s3.start_date, + merchants.mid, + _s1.min_start_date, ANY_VALUE(merchants.created_at) AS anything_created_at, MAX(_s3.cid) AS max_cid FROM main.merchants AS merchants @@ -32,11 +32,11 @@ WITH _t1 AS ( 2 ) SELECT - _s4.merchant_id AS merchants_id, + _s4.mid AS merchants_id, _s4.anything_created_at AS merchant_registration_date, - _s4.start_date AS earliest_coupon_start_date, + _s4.min_start_date AS earliest_coupon_start_date, _s4.max_cid AS earliest_coupon_id FROM _s4 AS _s4 JOIN _t1 AS _s5 - ON _s4.merchant_id = _s5.merchant_id + ON _s4.mid = _s5.merchant_id AND _s5.start_date <= DATEADD(YEAR, 1, CAST(_s4.anything_created_at AS TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql index d7ad86900..30b148903 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql @@ -18,8 +18,8 @@ WITH _t1 AS ( FROM main.coupons ), _s4 AS ( SELECT - _s3.merchant_id, - _s3.start_date, + merchants.mid, + _s1.min_start_date, MAX(merchants.created_at) AS anything_created_at, MAX(_s3.cid) AS max_cid FROM main.merchants AS merchants @@ -32,11 +32,11 @@ WITH _t1 AS ( 2 ) SELECT - _s4.merchant_id AS merchants_id, + _s4.mid AS merchants_id, _s4.anything_created_at AS merchant_registration_date, - _s4.start_date AS earliest_coupon_start_date, + _s4.min_start_date AS earliest_coupon_start_date, _s4.max_cid AS earliest_coupon_id FROM _s4 AS _s4 JOIN _t1 AS _s5 - ON _s4.merchant_id = _s5.merchant_id + ON _s4.mid = _s5.merchant_id AND _s5.start_date <= DATETIME(_s4.anything_created_at, '1 year') diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql index a15437263..03c0e34d0 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql @@ -30,9 +30,8 @@ WITH _s0 AS ( ) ), _t1 AS ( SELECT - _s9.s_name, - _s9.search_id, - COUNT(*) AS n_rows + _s0.s_name, + COUNT(_s9.search_id) AS count_search_id FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS DATETIME)) @@ -41,15 +40,13 @@ WITH _s0 AS ( LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id GROUP BY - 1, - 2 + searches.search_id, + 1 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(( - n_rows * CASE WHEN NOT search_id IS NULL THEN 1 ELSE 0 END - ) > 0) AS sum_is_intra_season + SUM(count_search_id > 0) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql index 21b79af08..477cbe21b 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql @@ -30,9 +30,8 @@ WITH _s0 AS ( ) ), _t1 AS ( SELECT - _s9.s_name, - _s9.search_id, - COUNT(*) AS n_rows + _s0.s_name, + COUNT(_s9.search_id) AS count_search_id FROM _s0 AS _s0 JOIN SEARCHES AS SEARCHES ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(SEARCHES.search_ts AS DATETIME)) @@ -41,15 +40,13 @@ WITH _s0 AS ( LEFT JOIN _s9 AS _s9 ON SEARCHES.search_id = _s9.search_id AND _s0.s_name = _s9.s_name GROUP BY - 1, - 2 + SEARCHES.search_id, + 1 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(( - n_rows * CASE WHEN NOT search_id IS NULL THEN 1 ELSE 0 END - ) > 0) AS sum_is_intra_season + SUM(count_search_id > 0) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql index 2dd1ee819..b8fbbb36d 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql @@ -30,9 +30,8 @@ WITH _s0 AS ( ) ), _t1 AS ( SELECT - _s9.s_name, - _s9.search_id, - COUNT(*) AS n_rows + _s0.s_name, + COUNT(_s9.search_id) AS count_search_id FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS TIMESTAMP)) @@ -41,21 +40,13 @@ WITH _s0 AS ( LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id GROUP BY - 1, - 2 + searches.search_id, + 1 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM( - CASE - WHEN ( - n_rows * CASE WHEN NOT search_id IS NULL THEN 1 ELSE 0 END - ) > 0 - THEN 1 - ELSE 0 - END - ) AS sum_is_intra_season + SUM(CASE WHEN count_search_id > 0 THEN 1 ELSE 0 END) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql index 56452f25d..d43061ea8 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql @@ -30,9 +30,8 @@ WITH _s0 AS ( ) ), _t1 AS ( SELECT - _s9.s_name, - _s9.search_id, - COUNT(*) AS n_rows + _s0.s_name, + COUNT(_s9.search_id) AS count_search_id FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) @@ -41,15 +40,13 @@ WITH _s0 AS ( LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id GROUP BY - 1, - 2 + searches.search_id, + 1 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - COUNT_IF(( - n_rows * IFF(NOT search_id IS NULL, 1, 0) - ) > 0) AS sum_is_intra_season + COUNT_IF(count_search_id > 0) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql index ac6901882..6d6e813ed 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql @@ -32,9 +32,8 @@ WITH _s0 AS ( ) ), _t1 AS ( SELECT - _s9.s_name, - _s9.search_id, - COUNT(*) AS n_rows + _s0.s_name, + COUNT(_s9.search_id) AS count_search_id FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = CAST(STRFTIME('%m', searches.search_ts) AS INTEGER) @@ -43,15 +42,13 @@ WITH _s0 AS ( LEFT JOIN _s9 AS _s9 ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id GROUP BY - 1, - 2 + searches.search_id, + 1 ), _s16 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(( - n_rows * IIF(NOT search_id IS NULL, 1, 0) - ) > 0) AS sum_is_intra_season + SUM(count_search_id > 0) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/quantile_test_2_ansi.sql b/tests/test_sql_refsols/quantile_test_2_ansi.sql index 3bd64c4cb..30c89063e 100644 --- a/tests/test_sql_refsols/quantile_test_2_ansi.sql +++ b/tests/test_sql_refsols/quantile_test_2_ansi.sql @@ -43,6 +43,6 @@ JOIN tpch.region AS region LEFT JOIN _s5 AS _s5 ON _s0.n_nationkey = _s5.c_nationkey GROUP BY - _s5.c_nationkey + _s0.n_nationkey ORDER BY 2 diff --git a/tests/test_sql_refsols/quantile_test_2_mysql.sql b/tests/test_sql_refsols/quantile_test_2_mysql.sql index d55c23d33..fed259c26 100644 --- a/tests/test_sql_refsols/quantile_test_2_mysql.sql +++ b/tests/test_sql_refsols/quantile_test_2_mysql.sql @@ -17,8 +17,8 @@ WITH _s0 AS ( AND EXTRACT(YEAR FROM CAST(ORDERS.o_orderdate AS DATETIME)) = 1998 ), _t1 AS ( SELECT - _s5.c_nationkey, _s0.n_name, + _s0.n_nationkey, _s5.o_totalprice, REGION.r_name, CASE @@ -102,6 +102,6 @@ SELECT MAX(o_totalprice) AS orders_max FROM _t1 GROUP BY - c_nationkey + n_nationkey ORDER BY 2 diff --git a/tests/test_sql_refsols/quantile_test_2_postgres.sql b/tests/test_sql_refsols/quantile_test_2_postgres.sql index 0e7d236ee..3ed701633 100644 --- a/tests/test_sql_refsols/quantile_test_2_postgres.sql +++ b/tests/test_sql_refsols/quantile_test_2_postgres.sql @@ -43,6 +43,6 @@ JOIN tpch.region AS region LEFT JOIN _s5 AS _s5 ON _s0.n_nationkey = _s5.c_nationkey GROUP BY - _s5.c_nationkey + _s0.n_nationkey ORDER BY 2 NULLS FIRST diff --git a/tests/test_sql_refsols/quantile_test_2_snowflake.sql b/tests/test_sql_refsols/quantile_test_2_snowflake.sql index 022e21507..dc89969a2 100644 --- a/tests/test_sql_refsols/quantile_test_2_snowflake.sql +++ b/tests/test_sql_refsols/quantile_test_2_snowflake.sql @@ -43,6 +43,6 @@ JOIN tpch.region AS region LEFT JOIN _s5 AS _s5 ON _s0.n_nationkey = _s5.c_nationkey GROUP BY - _s5.c_nationkey + _s0.n_nationkey ORDER BY 2 NULLS FIRST diff --git a/tests/test_sql_refsols/quantile_test_2_sqlite.sql b/tests/test_sql_refsols/quantile_test_2_sqlite.sql index ca3c12a6e..ed39f3e35 100644 --- a/tests/test_sql_refsols/quantile_test_2_sqlite.sql +++ b/tests/test_sql_refsols/quantile_test_2_sqlite.sql @@ -17,8 +17,8 @@ WITH _s0 AS ( AND customer.c_custkey = orders.o_custkey ), _t1 AS ( SELECT - _s5.c_nationkey, _s0.n_name, + _s0.n_nationkey, _s5.o_totalprice, region.r_name, CASE @@ -81,6 +81,6 @@ SELECT MAX(o_totalprice) AS orders_max FROM _t1 GROUP BY - c_nationkey + n_nationkey ORDER BY 2 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index 8783042be..b995e209d 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -13,10 +13,9 @@ WITH _s0 AS ( FROM main.incidents ), _t1 AS ( SELECT - _s7.in_device_id, ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, - COUNT(*) AS n_rows + COUNT(_s7.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices @@ -25,13 +24,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1, @@ -40,7 +39,7 @@ WITH _s0 AS ( SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, - ROUND(COALESCE(_s9.sum_n_rows, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s9.sum_count_in_device_id, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql index 43b4ef778..fe6e8a558 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql @@ -13,10 +13,9 @@ WITH _s0 AS ( FROM main.INCIDENTS ), _t1 AS ( SELECT - _s7.in_device_id, ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, - COUNT(*) AS n_rows + COUNT(_s7.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.DEVICES AS DEVICES @@ -25,13 +24,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON DEVICES.de_id = _s7.in_device_id GROUP BY - 1 + DEVICES.de_id ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1, @@ -40,7 +39,7 @@ WITH _s0 AS ( SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, - ROUND(COALESCE(_s9.sum_n_rows, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s9.sum_count_in_device_id, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql index 1b932331b..71a0fc511 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql @@ -13,10 +13,9 @@ WITH _s0 AS ( FROM main.incidents ), _t1 AS ( SELECT - _s7.in_device_id, MAX(_s3.co_id) AS anything__id_3, MAX(_s2.co_id) AS anything_co_id, - COUNT(*) AS n_rows + COUNT(_s7.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices @@ -25,13 +24,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1, @@ -41,7 +40,7 @@ SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, ROUND( - CAST(CAST(COALESCE(_s9.sum_n_rows, 0) AS DOUBLE PRECISION) / COALESCE(_s9.n_rows, 0) AS DECIMAL), + CAST(CAST(COALESCE(_s9.sum_count_in_device_id, 0) AS DOUBLE PRECISION) / COALESCE(_s9.n_rows, 0) AS DECIMAL), 2 ) AS ir FROM _s0 AS _s0 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql index 15dcb5047..a39d4b1e1 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -13,10 +13,9 @@ WITH _s0 AS ( FROM main.incidents ), _t1 AS ( SELECT - _s7.in_device_id, ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, - COUNT(*) AS n_rows + COUNT(_s7.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices @@ -25,13 +24,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows * IFF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1, @@ -40,7 +39,7 @@ WITH _s0 AS ( SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, - ROUND(COALESCE(_s9.sum_n_rows, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s9.sum_count_in_device_id, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index dbcaf6a68..ff19c8841 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -13,10 +13,9 @@ WITH _s0 AS ( FROM main.incidents ), _t1 AS ( SELECT - _s7.in_device_id, MAX(_s3.co_id) AS anything__id_3, MAX(_s2.co_id) AS anything_co_id, - COUNT(*) AS n_rows + COUNT(_s7.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices @@ -25,13 +24,13 @@ WITH _s0 AS ( LEFT JOIN _s7 AS _s7 ON _s7.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s9 AS ( SELECT anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(n_rows * IIF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1, @@ -40,7 +39,7 @@ WITH _s0 AS ( SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, - ROUND(CAST(COALESCE(_s9.sum_n_rows, 0) AS REAL) / COALESCE(_s9.n_rows, 0), 2) AS ir + ROUND(CAST(COALESCE(_s9.sum_count_in_device_id, 0) AS REAL) / COALESCE(_s9.n_rows, 0), 2) AS ir FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index d54610223..743c3d91c 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -38,21 +38,20 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT - _s11.in_device_id, ANY_VALUE(users.us_country_id) AS anything_us_country_id, - COUNT(*) AS n_rows + COUNT(_s11.in_device_id) AS count_in_device_id FROM main.users AS users JOIN main.devices AS devices ON devices.de_owner_id = users.us_id LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t5 GROUP BY 1 @@ -61,7 +60,7 @@ SELECT countries.co_name AS country_name, ROUND(COALESCE(_s3.sum_n_rows, 0) / _s3.n_rows, 2) AS made_ir, ROUND(COALESCE(_s7.sum_n_rows, 0) / _s7.n_rows, 2) AS sold_ir, - ROUND(COALESCE(_s13.sum_n_rows, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir + ROUND(COALESCE(_s13.sum_count_in_device_id, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir FROM main.countries AS countries JOIN _s3 AS _s3 ON _s3.de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql index e192773d4..966a4ac61 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql @@ -38,21 +38,20 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT - _s11.in_device_id, ANY_VALUE(USERS.us_country_id) AS anything_us_country_id, - COUNT(*) AS n_rows + COUNT(_s11.in_device_id) AS count_in_device_id FROM main.USERS AS USERS JOIN main.DEVICES AS DEVICES ON DEVICES.de_owner_id = USERS.us_id LEFT JOIN _t2 AS _s11 ON DEVICES.de_id = _s11.in_device_id GROUP BY - 1 + DEVICES.de_id ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t5 GROUP BY 1 @@ -61,7 +60,7 @@ SELECT COUNTRIES.co_name COLLATE utf8mb4_bin AS country_name, ROUND(COALESCE(_s3.sum_n_rows, 0) / _s3.n_rows, 2) AS made_ir, ROUND(COALESCE(_s7.sum_n_rows, 0) / _s7.n_rows, 2) AS sold_ir, - ROUND(COALESCE(_s13.sum_n_rows, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir + ROUND(COALESCE(_s13.sum_count_in_device_id, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir FROM main.COUNTRIES AS COUNTRIES JOIN _s3 AS _s3 ON COUNTRIES.co_id = _s3.de_production_country_id diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql index f415d53cf..ecffcf868 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql @@ -38,21 +38,20 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT - _s11.in_device_id, MAX(users.us_country_id) AS anything_us_country_id, - COUNT(*) AS n_rows + COUNT(_s11.in_device_id) AS count_in_device_id FROM main.users AS users JOIN main.devices AS devices ON devices.de_owner_id = users.us_id LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t5 GROUP BY 1 @@ -68,7 +67,7 @@ SELECT 2 ) AS sold_ir, ROUND( - CAST(CAST(COALESCE(_s13.sum_n_rows, 0) AS DOUBLE PRECISION) / COALESCE(_s13.n_rows, 0) AS DECIMAL), + CAST(CAST(COALESCE(_s13.sum_count_in_device_id, 0) AS DOUBLE PRECISION) / COALESCE(_s13.n_rows, 0) AS DECIMAL), 2 ) AS user_ir FROM main.countries AS countries diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql index 4703c4aaf..0ae77254f 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql @@ -38,21 +38,20 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT - _s11.in_device_id, ANY_VALUE(users.us_country_id) AS anything_us_country_id, - COUNT(*) AS n_rows + COUNT(_s11.in_device_id) AS count_in_device_id FROM main.users AS users JOIN main.devices AS devices ON devices.de_owner_id = users.us_id LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows * IFF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t5 GROUP BY 1 @@ -61,7 +60,7 @@ SELECT countries.co_name AS country_name, ROUND(COALESCE(_s3.sum_n_rows, 0) / _s3.n_rows, 2) AS made_ir, ROUND(COALESCE(_s7.sum_n_rows, 0) / _s7.n_rows, 2) AS sold_ir, - ROUND(COALESCE(_s13.sum_n_rows, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir + ROUND(COALESCE(_s13.sum_count_in_device_id, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir FROM main.countries AS countries JOIN _s3 AS _s3 ON _s3.de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index 747b8a865..2f2e2bc44 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -38,21 +38,20 @@ WITH _t2 AS ( 1 ), _t5 AS ( SELECT - _s11.in_device_id, MAX(users.us_country_id) AS anything_us_country_id, - COUNT(*) AS n_rows + COUNT(_s11.in_device_id) AS count_in_device_id FROM main.users AS users JOIN main.devices AS devices ON devices.de_owner_id = users.us_id LEFT JOIN _t2 AS _s11 ON _s11.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s13 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(n_rows * IIF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t5 GROUP BY 1 @@ -61,7 +60,10 @@ SELECT countries.co_name AS country_name, ROUND(CAST(COALESCE(_s3.sum_n_rows, 0) AS REAL) / _s3.n_rows, 2) AS made_ir, ROUND(CAST(COALESCE(_s7.sum_n_rows, 0) AS REAL) / _s7.n_rows, 2) AS sold_ir, - ROUND(CAST(COALESCE(_s13.sum_n_rows, 0) AS REAL) / COALESCE(_s13.n_rows, 0), 2) AS user_ir + ROUND( + CAST(COALESCE(_s13.sum_count_in_device_id, 0) AS REAL) / COALESCE(_s13.n_rows, 0), + 2 + ) AS user_ir FROM main.countries AS countries JOIN _s3 AS _s3 ON _s3.de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index ec68d668a..87fedf6a9 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -4,28 +4,27 @@ WITH _s3 AS ( FROM main.incidents ), _t1 AS ( SELECT - _s3.in_device_id, ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, - COUNT(*) AS n_rows + COUNT(_s3.in_device_id) AS count_in_device_id FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_incidents + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1 ) SELECT countries.co_name AS country, - ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_count_in_device_id, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql index 2f71be8e8..718d1659f 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql @@ -4,28 +4,27 @@ WITH _s3 AS ( FROM main.INCIDENTS ), _t1 AS ( SELECT - _s3.in_device_id, ANY_VALUE(DEVICES.de_production_country_id) AS anything_de_production_country_id, - COUNT(*) AS n_rows + COUNT(_s3.in_device_id) AS count_in_device_id FROM main.DEVICES AS DEVICES JOIN main.PRODUCTS AS PRODUCTS ON DEVICES.de_product_id = PRODUCTS.pr_id AND PRODUCTS.pr_name = 'Sun-Set' LEFT JOIN _s3 AS _s3 ON DEVICES.de_id = _s3.in_device_id GROUP BY - 1 + DEVICES.de_id ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_incidents + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1 ) SELECT COUNTRIES.co_name COLLATE utf8mb4_bin AS country, - ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_count_in_device_id, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.COUNTRIES AS COUNTRIES LEFT JOIN _s5 AS _s5 ON COUNTRIES.co_id = _s5.anything_de_production_country_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql index 9750d9a8b..e4f683ea6 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql @@ -4,21 +4,20 @@ WITH _s3 AS ( FROM main.incidents ), _t1 AS ( SELECT - _s3.in_device_id, MAX(devices.de_production_country_id) AS anything_de_production_country_id, - COUNT(*) AS n_rows + COUNT(_s3.in_device_id) AS count_in_device_id FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows * CASE WHEN NOT in_device_id IS NULL THEN 1 ELSE 0 END) AS sum_n_incidents + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1 @@ -26,7 +25,7 @@ WITH _s3 AS ( SELECT countries.co_name AS country, ROUND( - CAST(CAST(COALESCE(_s5.sum_n_incidents, 0) AS DOUBLE PRECISION) / COALESCE(_s5.n_rows, 0) AS DECIMAL), + CAST(CAST(COALESCE(_s5.sum_count_in_device_id, 0) AS DOUBLE PRECISION) / COALESCE(_s5.n_rows, 0) AS DECIMAL), 2 ) AS ir FROM main.countries AS countries diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql index 360f65fa6..ee93aa6dc 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -4,28 +4,27 @@ WITH _s3 AS ( FROM main.incidents ), _t1 AS ( SELECT - _s3.in_device_id, ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, - COUNT(*) AS n_rows + COUNT(_s3.in_device_id) AS count_in_device_id FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows * IFF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_incidents + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1 ) SELECT countries.co_name AS country, - ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_count_in_device_id, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index b49ac1207..682b2dd3d 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -4,28 +4,27 @@ WITH _s3 AS ( FROM main.incidents ), _t1 AS ( SELECT - _s3.in_device_id, MAX(devices.de_production_country_id) AS anything_de_production_country_id, - COUNT(*) AS n_rows + COUNT(_s3.in_device_id) AS count_in_device_id FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' LEFT JOIN _s3 AS _s3 ON _s3.in_device_id = devices.de_id GROUP BY - 1 + devices.de_id ), _s5 AS ( SELECT anything_de_production_country_id, COUNT(*) AS n_rows, - SUM(n_rows * IIF(NOT in_device_id IS NULL, 1, 0)) AS sum_n_incidents + SUM(count_in_device_id) AS sum_count_in_device_id FROM _t1 GROUP BY 1 ) SELECT countries.co_name AS country, - ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(CAST(COALESCE(_s5.sum_count_in_device_id, 0) AS REAL) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index 38b0e3302..a53a1e9b5 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -46,23 +46,17 @@ WITH _t5 AS ( AND _s11.o_orderkey = _t3.o_orderkey WHERE _t3.anything_o_orderstatus = 'F' -), _t0 AS ( - SELECT - _s13.anything_l_suppkey, - ANY_VALUE(supplier.s_name) AS anything_s_name, - COUNT(*) AS n_rows - FROM tpch.supplier AS supplier - JOIN tpch.nation AS nation - ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey - LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey - GROUP BY - 1 ) SELECT - anything_s_name AS S_NAME, - n_rows * CASE WHEN NOT anything_l_suppkey IS NULL THEN 1 ELSE 0 END AS NUMWAIT -FROM _t0 + ANY_VALUE(supplier.s_name) AS S_NAME, + COUNT(_s13.anything_l_suppkey) AS NUMWAIT +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey +LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey +GROUP BY + supplier.s_suppkey ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index f98980c93..302c196ca 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -50,23 +50,17 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL -), _t0 AS ( - SELECT - _s13.anything_l_suppkey, - ANY_VALUE(SUPPLIER.s_name) AS anything_s_name, - COUNT(*) AS n_rows - FROM tpch.SUPPLIER AS SUPPLIER - JOIN tpch.NATION AS NATION - ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey - LEFT JOIN _s13 AS _s13 - ON SUPPLIER.s_suppkey = _s13.anything_l_suppkey - GROUP BY - 1 ) SELECT - anything_s_name COLLATE utf8mb4_bin AS S_NAME, - n_rows * CASE WHEN NOT anything_l_suppkey IS NULL THEN 1 ELSE 0 END AS NUMWAIT -FROM _t0 + ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, + COUNT(_s13.anything_l_suppkey) AS NUMWAIT +FROM tpch.SUPPLIER AS SUPPLIER +JOIN tpch.NATION AS NATION + ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey +LEFT JOIN _s13 AS _s13 + ON SUPPLIER.s_suppkey = _s13.anything_l_suppkey +GROUP BY + SUPPLIER.s_suppkey ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index 272f6509f..79c4527cd 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -50,23 +50,17 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL -), _t0 AS ( - SELECT - _s13.anything_l_suppkey, - MAX(supplier.s_name) AS anything_s_name, - COUNT(*) AS n_rows - FROM tpch.supplier AS supplier - JOIN tpch.nation AS nation - ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey - LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey - GROUP BY - 1 ) SELECT - anything_s_name AS S_NAME, - n_rows * CASE WHEN NOT anything_l_suppkey IS NULL THEN 1 ELSE 0 END AS NUMWAIT -FROM _t0 + MAX(supplier.s_name) AS S_NAME, + COUNT(_s13.anything_l_suppkey) AS NUMWAIT +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey +LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey +GROUP BY + supplier.s_suppkey ORDER BY 2 DESC NULLS LAST, 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index 7a0653c71..aa6a0445b 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -50,23 +50,17 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL -), _t0 AS ( - SELECT - _s13.anything_l_suppkey, - ANY_VALUE(supplier.s_name) AS anything_s_name, - COUNT(*) AS n_rows - FROM tpch.supplier AS supplier - JOIN tpch.nation AS nation - ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey - LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey - GROUP BY - 1 ) SELECT - anything_s_name AS S_NAME, - n_rows * IFF(NOT anything_l_suppkey IS NULL, 1, 0) AS NUMWAIT -FROM _t0 + ANY_VALUE(supplier.s_name) AS S_NAME, + COUNT(_s13.anything_l_suppkey) AS NUMWAIT +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey +LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey +GROUP BY + supplier.s_suppkey ORDER BY 2 DESC NULLS LAST, 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index 813e8788d..c5ceb7d67 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -50,23 +50,17 @@ WITH _t5 AS ( AND _t3.o_orderkey = _u_0._u_3 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL -), _t0 AS ( - SELECT - _s13.anything_l_suppkey, - MAX(supplier.s_name) AS anything_s_name, - COUNT(*) AS n_rows - FROM tpch.supplier AS supplier - JOIN tpch.nation AS nation - ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey - LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey - GROUP BY - 1 ) SELECT - anything_s_name AS S_NAME, - n_rows * IIF(NOT anything_l_suppkey IS NULL, 1, 0) AS NUMWAIT -FROM _t0 + MAX(supplier.s_name) AS S_NAME, + COUNT(_s13.anything_l_suppkey) AS NUMWAIT +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey +LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey +GROUP BY + supplier.s_suppkey ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/tpch_q22_ansi.sql b/tests/test_sql_refsols/tpch_q22_ansi.sql index a1663060c..04ca82b32 100644 --- a/tests/test_sql_refsols/tpch_q22_ansi.sql +++ b/tests/test_sql_refsols/tpch_q22_ansi.sql @@ -5,34 +5,17 @@ WITH _s0 AS ( WHERE SUBSTRING(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') AND c_acctbal > 0.0 -), _s3 AS ( - SELECT - o_custkey - FROM tpch.orders -), _t2 AS ( - SELECT - _s3.o_custkey, - ANY_VALUE(customer.c_acctbal) AS anything_c_acctbal, - ANY_VALUE(customer.c_phone) AS anything_c_phone, - COUNT(*) AS n_rows - FROM _s0 AS _s0 - JOIN tpch.customer AS customer - ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') - AND _s0.avg_c_acctbal < customer.c_acctbal - LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey - GROUP BY - 1 ) SELECT - SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, + SUBSTRING(customer.c_phone, 1, 2) AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL -FROM _t2 -WHERE - ( - n_rows * CASE WHEN NOT o_custkey IS NULL THEN 1 ELSE 0 END - ) = 0 + COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL +FROM _s0 AS _s0 +JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_mysql.sql b/tests/test_sql_refsols/tpch_q22_mysql.sql index 12c094ea4..66a16f143 100644 --- a/tests/test_sql_refsols/tpch_q22_mysql.sql +++ b/tests/test_sql_refsols/tpch_q22_mysql.sql @@ -5,34 +5,25 @@ WITH _s0 AS ( WHERE c_acctbal > 0.0 AND SUBSTRING(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') -), _s3 AS ( +), _u_0 AS ( SELECT - o_custkey + o_custkey AS _u_1 FROM tpch.ORDERS -), _t2 AS ( - SELECT - _s3.o_custkey, - ANY_VALUE(CUSTOMER.c_acctbal) AS anything_c_acctbal, - ANY_VALUE(CUSTOMER.c_phone) AS anything_c_phone, - COUNT(*) AS n_rows - FROM _s0 AS _s0 - JOIN tpch.CUSTOMER AS CUSTOMER - ON CUSTOMER.c_acctbal > _s0.avg_c_acctbal - AND SUBSTRING(CUSTOMER.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') - LEFT JOIN _s3 AS _s3 - ON CUSTOMER.c_custkey = _s3.o_custkey GROUP BY 1 ) SELECT - SUBSTRING(anything_c_phone, 1, 2) COLLATE utf8mb4_bin AS CNTRY_CODE, + SUBSTRING(CUSTOMER.c_phone, 1, 2) COLLATE utf8mb4_bin AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL -FROM _t2 + COALESCE(SUM(CUSTOMER.c_acctbal), 0) AS TOTACCTBAL +FROM _s0 AS _s0 +JOIN tpch.CUSTOMER AS CUSTOMER + ON CUSTOMER.c_acctbal > _s0.avg_c_acctbal + AND SUBSTRING(CUSTOMER.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') +LEFT JOIN _u_0 AS _u_0 + ON CUSTOMER.c_custkey = _u_0._u_1 WHERE - ( - n_rows * CASE WHEN NOT o_custkey IS NULL THEN 1 ELSE 0 END - ) = 0 + _u_0._u_1 IS NULL GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_postgres.sql b/tests/test_sql_refsols/tpch_q22_postgres.sql index 4e78bfbaf..c34da6644 100644 --- a/tests/test_sql_refsols/tpch_q22_postgres.sql +++ b/tests/test_sql_refsols/tpch_q22_postgres.sql @@ -5,34 +5,25 @@ WITH _s0 AS ( WHERE SUBSTRING(c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17') AND c_acctbal > 0.0 -), _s3 AS ( +), _u_0 AS ( SELECT - o_custkey + o_custkey AS _u_1 FROM tpch.orders -), _t2 AS ( - SELECT - _s3.o_custkey, - MAX(customer.c_acctbal) AS anything_c_acctbal, - MAX(customer.c_phone) AS anything_c_phone, - COUNT(*) AS n_rows - FROM _s0 AS _s0 - JOIN tpch.customer AS customer - ON SUBSTRING(customer.c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17') - AND _s0.avg_c_acctbal < customer.c_acctbal - LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey GROUP BY 1 ) SELECT - SUBSTRING(anything_c_phone FROM 1 FOR 2) AS CNTRY_CODE, + SUBSTRING(customer.c_phone FROM 1 FOR 2) AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL -FROM _t2 + COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL +FROM _s0 AS _s0 +JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey WHERE - ( - n_rows * CASE WHEN NOT o_custkey IS NULL THEN 1 ELSE 0 END - ) = 0 + _u_0._u_1 IS NULL GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_snowflake.sql b/tests/test_sql_refsols/tpch_q22_snowflake.sql index 8268a3f57..8f8901a3c 100644 --- a/tests/test_sql_refsols/tpch_q22_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q22_snowflake.sql @@ -5,34 +5,25 @@ WITH _s0 AS ( WHERE SUBSTRING(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') AND c_acctbal > 0.0 -), _s3 AS ( +), _u_0 AS ( SELECT - o_custkey + o_custkey AS _u_1 FROM tpch.orders -), _t2 AS ( - SELECT - _s3.o_custkey, - ANY_VALUE(customer.c_acctbal) AS anything_c_acctbal, - ANY_VALUE(customer.c_phone) AS anything_c_phone, - COUNT(*) AS n_rows - FROM _s0 AS _s0 - JOIN tpch.customer AS customer - ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') - AND _s0.avg_c_acctbal < customer.c_acctbal - LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey GROUP BY 1 ) SELECT - SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, + SUBSTRING(customer.c_phone, 1, 2) AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL -FROM _t2 + COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL +FROM _s0 AS _s0 +JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey WHERE - ( - n_rows * IFF(NOT o_custkey IS NULL, 1, 0) - ) = 0 + _u_0._u_1 IS NULL GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q22_sqlite.sql b/tests/test_sql_refsols/tpch_q22_sqlite.sql index fde501a05..ac8ce5c6b 100644 --- a/tests/test_sql_refsols/tpch_q22_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q22_sqlite.sql @@ -5,34 +5,25 @@ WITH _s0 AS ( WHERE SUBSTRING(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') AND c_acctbal > 0.0 -), _s3 AS ( +), _u_0 AS ( SELECT - o_custkey + o_custkey AS _u_1 FROM tpch.orders -), _t2 AS ( - SELECT - _s3.o_custkey, - MAX(customer.c_acctbal) AS anything_c_acctbal, - MAX(customer.c_phone) AS anything_c_phone, - COUNT(*) AS n_rows - FROM _s0 AS _s0 - JOIN tpch.customer AS customer - ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') - AND _s0.avg_c_acctbal < customer.c_acctbal - LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey GROUP BY 1 ) SELECT - SUBSTRING(anything_c_phone, 1, 2) AS CNTRY_CODE, + SUBSTRING(customer.c_phone, 1, 2) AS CNTRY_CODE, COUNT(*) AS NUM_CUSTS, - COALESCE(SUM(anything_c_acctbal), 0) AS TOTACCTBAL -FROM _t2 + COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL +FROM _s0 AS _s0 +JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey WHERE - ( - n_rows * IIF(NOT o_custkey IS NULL, 1, 0) - ) = 0 + _u_0._u_1 IS NULL GROUP BY 1 ORDER BY diff --git a/tests/test_unqualified_node.py b/tests/test_unqualified_node.py index 5d4b504da..5f2347d3e 100644 --- a/tests/test_unqualified_node.py +++ b/tests/test_unqualified_node.py @@ -478,7 +478,7 @@ def test_unqualified_to_string( ), pytest.param( impl_tpch_q22, - "TPCH.CALCULATE(global_avg_balance=AVG(customers.CALCULATE(cntry_code=SLICE(phone, None, 2, None)).WHERE(ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17'])).WHERE((account_balance > 0.0)).account_balance)).customers.CALCULATE(cntry_code=SLICE(phone, None, 2, None)).WHERE(((ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']) & (account_balance > global_avg_balance)) & (COUNT(orders) == 0))).PARTITION(name='countries', by=(cntry_code)).CALCULATE(CNTRY_CODE=cntry_code, NUM_CUSTS=COUNT(customers), TOTACCTBAL=SUM(customers.account_balance)).ORDER_BY(CNTRY_CODE.ASC(na_pos='first'))", + "TPCH.CALCULATE(global_avg_balance=AVG(customers.CALCULATE(cntry_code=SLICE(phone, None, 2, None)).WHERE(ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17'])).WHERE((account_balance > 0.0)).account_balance)).customers.CALCULATE(cntry_code=SLICE(phone, None, 2, None)).WHERE(((ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']) & (account_balance > global_avg_balance)) & HASNOT(orders))).PARTITION(name='countries', by=(cntry_code)).CALCULATE(CNTRY_CODE=cntry_code, NUM_CUSTS=COUNT(customers), TOTACCTBAL=SUM(customers.account_balance)).ORDER_BY(CNTRY_CODE.ASC(na_pos='first'))", id="tpch_q22", ), pytest.param( From def91093a49273a231be617041e99fd6544c1102 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 13:31:03 -0400 Subject: [PATCH 114/143] Changing dealership adv3 --- .../defog_test_functions.py | 5 ++-- .../defog_dealership_adv3_ansi.sql | 27 ++++++++++++------- .../defog_dealership_adv3_mysql.sql | 27 ++++++++++++------- .../defog_dealership_adv3_postgres.sql | 27 ++++++++++++------- .../defog_dealership_adv3_snowflake.sql | 27 ++++++++++++------- .../defog_dealership_adv3_sqlite.sql | 27 ++++++++++++------- 6 files changed, 93 insertions(+), 47 deletions(-) diff --git a/tests/test_pydough_functions/defog_test_functions.py b/tests/test_pydough_functions/defog_test_functions.py index 2b699e354..f8f12a4f1 100644 --- a/tests/test_pydough_functions/defog_test_functions.py +++ b/tests/test_pydough_functions/defog_test_functions.py @@ -727,8 +727,9 @@ def impl_defog_dealership_adv3(): model names, engine_type and vin_number, match case-insensitively and allow partial matches using LIKE with wildcards. """ - return cars.WHERE(CONTAINS(LOWER(vin_number), "m5")).CALCULATE( - make, model, num_sales=COUNT(sale_records) + selected_cars = cars.WHERE(CONTAINS(LOWER(vin_number), "m5")) + return selected_cars.PARTITION(name="car_models", by=(make, model)).CALCULATE( + make, model, num_sales=COUNT(cars.sale_records) ) diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index a9792054c..61425e9f0 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -2,15 +2,24 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + ANY_VALUE(cars.make) AS anything_make, + ANY_VALUE(cars.model) AS anything_model, + COUNT(_s1.car_id) AS count_car_id + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + cars._id ) SELECT - ANY_VALUE(cars.make) AS make, - ANY_VALUE(cars.model) AS model, - COUNT(_s1.car_id) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.vin_number) LIKE '%m5%' + anything_make AS make, + anything_model AS model, + SUM(count_car_id) AS num_sales +FROM _t0 GROUP BY - cars._id + 1, + 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index a9792054c..61425e9f0 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -2,15 +2,24 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + ANY_VALUE(cars.make) AS anything_make, + ANY_VALUE(cars.model) AS anything_model, + COUNT(_s1.car_id) AS count_car_id + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + cars._id ) SELECT - ANY_VALUE(cars.make) AS make, - ANY_VALUE(cars.model) AS model, - COUNT(_s1.car_id) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.vin_number) LIKE '%m5%' + anything_make AS make, + anything_model AS model, + SUM(count_car_id) AS num_sales +FROM _t0 GROUP BY - cars._id + 1, + 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index 64026756e..73ade4a2b 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -2,15 +2,24 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + MAX(cars.make) AS anything_make, + MAX(cars.model) AS anything_model, + COUNT(_s1.car_id) AS count_car_id + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + cars._id ) SELECT - MAX(cars.make) AS make, - MAX(cars.model) AS model, - COUNT(_s1.car_id) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.vin_number) LIKE '%m5%' + anything_make AS make, + anything_model AS model, + SUM(count_car_id) AS num_sales +FROM _t0 GROUP BY - cars._id + 1, + 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index a88aaf0c0..0b0f18d90 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -2,15 +2,24 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + ANY_VALUE(cars.make) AS anything_make, + ANY_VALUE(cars.model) AS anything_model, + COUNT(_s1.car_id) AS count_car_id + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + CONTAINS(LOWER(cars.vin_number), 'm5') + GROUP BY + cars._id ) SELECT - ANY_VALUE(cars.make) AS make, - ANY_VALUE(cars.model) AS model, - COUNT(_s1.car_id) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - CONTAINS(LOWER(cars.vin_number), 'm5') + anything_make AS make, + anything_model AS model, + SUM(count_car_id) AS num_sales +FROM _t0 GROUP BY - cars._id + 1, + 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index 64026756e..73ade4a2b 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -2,15 +2,24 @@ WITH _s1 AS ( SELECT car_id FROM main.sales +), _t0 AS ( + SELECT + MAX(cars.make) AS anything_make, + MAX(cars.model) AS anything_model, + COUNT(_s1.car_id) AS count_car_id + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + cars._id ) SELECT - MAX(cars.make) AS make, - MAX(cars.model) AS model, - COUNT(_s1.car_id) AS num_sales -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id -WHERE - LOWER(cars.vin_number) LIKE '%m5%' + anything_make AS make, + anything_model AS model, + SUM(count_car_id) AS num_sales +FROM _t0 GROUP BY - cars._id + 1, + 2 From 0bae74c6d6467b0105f3a15d489e57f0657ba778 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 13:51:04 -0400 Subject: [PATCH 115/143] Updating academia tests --- .../defog_academic_gen15_ansi.sql | 20 +++++++++++-------- .../defog_academic_gen15_mysql.sql | 20 +++++++++++-------- .../defog_academic_gen15_postgres.sql | 20 +++++++++++-------- .../defog_academic_gen15_snowflake.sql | 20 +++++++++++-------- .../defog_academic_gen15_sqlite.sql | 20 +++++++++++-------- .../defog_academic_gen17_ansi.sql | 11 +++++----- .../defog_academic_gen17_mysql.sql | 11 +++++----- .../defog_academic_gen17_postgres.sql | 11 +++++----- .../defog_academic_gen17_snowflake.sql | 11 +++++----- .../defog_academic_gen17_sqlite.sql | 11 +++++----- .../defog_academic_gen19_ansi.sql | 11 +++++----- .../defog_academic_gen19_mysql.sql | 11 +++++----- .../defog_academic_gen19_postgres.sql | 11 +++++----- .../defog_academic_gen19_snowflake.sql | 11 +++++----- .../defog_academic_gen19_sqlite.sql | 11 +++++----- 15 files changed, 110 insertions(+), 100 deletions(-) diff --git a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql index 8aec67c5a..77cf19d0e 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql @@ -1,17 +1,21 @@ WITH _s1 AS ( SELECT - oid, - COUNT(*) AS n_rows + oid FROM main.author +), _t1 AS ( + SELECT + ANY_VALUE(organization.continent) AS anything_continent, + COUNT(_s1.oid) AS count_oid + FROM main.organization AS organization + LEFT JOIN _s1 AS _s1 + ON _s1.oid = organization.oid GROUP BY - 1 + organization.oid ) SELECT - organization.continent, - COALESCE(SUM(_s1.n_rows), 0) / COUNT(*) AS ratio -FROM main.organization AS organization -LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + anything_continent AS continent, + SUM(count_oid) / COUNT(*) AS ratio +FROM _t1 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql index 8aec67c5a..77cf19d0e 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql @@ -1,17 +1,21 @@ WITH _s1 AS ( SELECT - oid, - COUNT(*) AS n_rows + oid FROM main.author +), _t1 AS ( + SELECT + ANY_VALUE(organization.continent) AS anything_continent, + COUNT(_s1.oid) AS count_oid + FROM main.organization AS organization + LEFT JOIN _s1 AS _s1 + ON _s1.oid = organization.oid GROUP BY - 1 + organization.oid ) SELECT - organization.continent, - COALESCE(SUM(_s1.n_rows), 0) / COUNT(*) AS ratio -FROM main.organization AS organization -LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + anything_continent AS continent, + SUM(count_oid) / COUNT(*) AS ratio +FROM _t1 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql index 0062172b1..33e71aa40 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql @@ -1,17 +1,21 @@ WITH _s1 AS ( SELECT - oid, - COUNT(*) AS n_rows + oid FROM main.author +), _t1 AS ( + SELECT + MAX(organization.continent) AS anything_continent, + COUNT(_s1.oid) AS count_oid + FROM main.organization AS organization + LEFT JOIN _s1 AS _s1 + ON _s1.oid = organization.oid GROUP BY - 1 + organization.oid ) SELECT - organization.continent, - CAST(COALESCE(SUM(_s1.n_rows), 0) AS DOUBLE PRECISION) / COUNT(*) AS ratio -FROM main.organization AS organization -LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + anything_continent AS continent, + CAST(SUM(count_oid) AS DOUBLE PRECISION) / COUNT(*) AS ratio +FROM _t1 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql index bef6f834f..b1767f69f 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql @@ -1,17 +1,21 @@ WITH _s1 AS ( SELECT - oid, - COUNT(*) AS n_rows + oid FROM main.author +), _t1 AS ( + SELECT + ANY_VALUE(organization.continent) AS anything_continent, + COUNT(_s1.oid) AS count_oid + FROM main.organization AS organization + LEFT JOIN _s1 AS _s1 + ON _s1.oid = organization.oid GROUP BY - 1 + organization.oid ) SELECT - organization.continent, - COALESCE(SUM(_s1.n_rows), 0) / COUNT(*) AS ratio -FROM main.organization AS organization -LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + anything_continent AS continent, + SUM(count_oid) / COUNT(*) AS ratio +FROM _t1 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql index 6d72e6c55..0e819c397 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql @@ -1,17 +1,21 @@ WITH _s1 AS ( SELECT - oid, - COUNT(*) AS n_rows + oid FROM main.author +), _t1 AS ( + SELECT + MAX(organization.continent) AS anything_continent, + COUNT(_s1.oid) AS count_oid + FROM main.organization AS organization + LEFT JOIN _s1 AS _s1 + ON _s1.oid = organization.oid GROUP BY - 1 + organization.oid ) SELECT - organization.continent, - CAST(COALESCE(SUM(_s1.n_rows), 0) AS REAL) / COUNT(*) AS ratio -FROM main.organization AS organization -LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + anything_continent AS continent, + CAST(SUM(count_oid) AS REAL) / COUNT(*) AS ratio +FROM _t1 GROUP BY 1 ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql index f0641b802..b5ef1f4fd 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - conference.name, - COALESCE(_s1.n_rows, 0) AS count_publications + ANY_VALUE(conference.name) AS name, + COUNT(_s1.cid) AS count_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC, 1 DESC diff --git a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql index d5b70427f..e9ff73f69 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - name COLLATE utf8mb4_bin AS name, - COALESCE(_s1.n_rows, 0) AS count_publications + ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, + COUNT(_s1.cid) AS count_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC, 1 DESC diff --git a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql index 4339e1f73..e2f50fde0 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - conference.name, - COALESCE(_s1.n_rows, 0) AS count_publications + MAX(conference.name) AS name, + COUNT(_s1.cid) AS count_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC NULLS LAST, 1 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql index 4339e1f73..519888bc7 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - conference.name, - COALESCE(_s1.n_rows, 0) AS count_publications + ANY_VALUE(conference.name) AS name, + COUNT(_s1.cid) AS count_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC NULLS LAST, 1 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql index f0641b802..0504ad5e3 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - conference.name, - COALESCE(_s1.n_rows, 0) AS count_publications + MAX(conference.name) AS name, + COUNT(_s1.cid) AS count_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC, 1 DESC diff --git a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql index fdcb1ec05..50870e1ee 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - conference.name, - COALESCE(_s1.n_rows, 0) AS num_publications + ANY_VALUE(conference.name) AS name, + COUNT(_s1.cid) AS num_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql index 3677a3657..8d051c160 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - name COLLATE utf8mb4_bin AS name, - COALESCE(_s1.n_rows, 0) AS num_publications + ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, + COUNT(_s1.cid) AS num_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC, 1 diff --git a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql index a55f1326e..450cbfea6 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - conference.name, - COALESCE(_s1.n_rows, 0) AS num_publications + MAX(conference.name) AS name, + COUNT(_s1.cid) AS num_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC NULLS LAST, 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql index a55f1326e..ada8fbc3f 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - conference.name, - COALESCE(_s1.n_rows, 0) AS num_publications + ANY_VALUE(conference.name) AS name, + COUNT(_s1.cid) AS num_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC NULLS LAST, 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql index fdcb1ec05..931325963 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql @@ -1,17 +1,16 @@ WITH _s1 AS ( SELECT - cid, - COUNT(*) AS n_rows + cid FROM main.publication - GROUP BY - 1 ) SELECT - conference.name, - COALESCE(_s1.n_rows, 0) AS num_publications + MAX(conference.name) AS name, + COUNT(_s1.cid) AS num_publications FROM main.conference AS conference LEFT JOIN _s1 AS _s1 ON _s1.cid = conference.cid +GROUP BY + conference.cid ORDER BY 2 DESC, 1 From b7fa6e4bb2df3a0d0f0f0509ddc2357590c9945b Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 13:56:07 -0400 Subject: [PATCH 116/143] Updating left join merge rule --- pydough/sqlglot/override_merge_subqueries.py | 6 +++++- .../conditional_functions_ansi.sql | 16 +++++----------- .../conditional_functions_mysql.sql | 16 +++++----------- .../conditional_functions_postgres.sql | 16 +++++----------- .../conditional_functions_snowflake.sql | 16 +++++----------- .../conditional_functions_sqlite.sql | 16 +++++----------- tests/test_sql_refsols/correl_8_sqlite.sql | 15 ++++----------- .../defog_academic_gen15_ansi.sql | 12 ++++-------- .../defog_academic_gen15_mysql.sql | 12 ++++-------- .../defog_academic_gen15_postgres.sql | 12 ++++-------- .../defog_academic_gen15_snowflake.sql | 12 ++++-------- .../defog_academic_gen15_sqlite.sql | 12 ++++-------- .../defog_academic_gen17_ansi.sql | 11 +++-------- .../defog_academic_gen17_mysql.sql | 11 +++-------- .../defog_academic_gen17_postgres.sql | 11 +++-------- .../defog_academic_gen17_snowflake.sql | 11 +++-------- .../defog_academic_gen17_sqlite.sql | 11 +++-------- .../defog_academic_gen19_ansi.sql | 11 +++-------- .../defog_academic_gen19_mysql.sql | 11 +++-------- .../defog_academic_gen19_postgres.sql | 11 +++-------- .../defog_academic_gen19_snowflake.sql | 11 +++-------- .../defog_academic_gen19_sqlite.sql | 11 +++-------- .../defog_dealership_adv3_ansi.sql | 12 ++++-------- .../defog_dealership_adv3_mysql.sql | 12 ++++-------- .../defog_dealership_adv3_postgres.sql | 12 ++++-------- .../defog_dealership_adv3_snowflake.sql | 12 ++++-------- .../defog_dealership_adv3_sqlite.sql | 12 ++++-------- .../defog_dealership_adv6_ansi.sql | 11 +++-------- .../defog_dealership_adv6_mysql.sql | 11 +++-------- .../defog_dealership_adv6_postgres.sql | 11 +++-------- .../defog_dealership_adv6_snowflake.sql | 11 +++-------- .../defog_dealership_adv6_sqlite.sql | 11 +++-------- .../defog_dermtreatment_adv5_ansi.sql | 13 ++++--------- .../defog_dermtreatment_adv5_mysql.sql | 11 +++-------- .../defog_dermtreatment_adv5_postgres.sql | 11 +++-------- .../defog_dermtreatment_adv5_snowflake.sql | 11 +++-------- .../defog_dermtreatment_adv5_sqlite.sql | 11 +++-------- .../defog_dermtreatment_gen2_ansi.sql | 14 ++++---------- .../defog_dermtreatment_gen2_snowflake.sql | 14 ++++---------- .../defog_ewallet_adv12_ansi.sql | 12 +++--------- .../defog_ewallet_adv12_mysql.sql | 12 +++--------- .../defog_ewallet_adv12_postgres.sql | 12 +++--------- .../defog_ewallet_adv12_snowflake.sql | 12 +++--------- .../defog_ewallet_adv12_sqlite.sql | 12 +++--------- .../defog_ewallet_basic8_ansi.sql | 15 ++++----------- .../defog_ewallet_basic8_mysql.sql | 15 ++++----------- .../defog_ewallet_basic8_postgres.sql | 15 ++++----------- .../defog_ewallet_basic8_snowflake.sql | 15 ++++----------- .../defog_ewallet_basic8_sqlite.sql | 15 ++++----------- .../defog_ewallet_basic9_ansi.sql | 12 +++--------- .../defog_ewallet_basic9_mysql.sql | 12 +++--------- .../defog_ewallet_basic9_postgres.sql | 12 +++--------- .../defog_ewallet_basic9_snowflake.sql | 12 +++--------- .../defog_ewallet_basic9_sqlite.sql | 12 +++--------- .../test_sql_refsols/defog_ewallet_gen4_ansi.sql | 12 +++--------- .../defog_ewallet_gen4_mysql.sql | 12 +++--------- .../defog_ewallet_gen4_postgres.sql | 12 +++--------- .../defog_ewallet_gen4_snowflake.sql | 12 +++--------- .../defog_ewallet_gen4_sqlite.sql | 12 +++--------- ...nograph_country_combination_analysis_ansi.sql | 10 +++------- ...ograph_country_combination_analysis_mysql.sql | 10 +++------- ...aph_country_combination_analysis_postgres.sql | 10 +++------- ...ph_country_combination_analysis_snowflake.sql | 10 +++------- ...graph_country_combination_analysis_sqlite.sql | 10 +++------- ...rror_rate_sun_set_by_factory_country_ansi.sql | 12 ++++-------- ...ror_rate_sun_set_by_factory_country_mysql.sql | 12 ++++-------- ..._rate_sun_set_by_factory_country_postgres.sql | 12 ++++-------- ...rate_sun_set_by_factory_country_snowflake.sql | 12 ++++-------- ...or_rate_sun_set_by_factory_country_sqlite.sql | 12 ++++-------- 69 files changed, 243 insertions(+), 593 deletions(-) diff --git a/pydough/sqlglot/override_merge_subqueries.py b/pydough/sqlglot/override_merge_subqueries.py index 3cfb4d85e..ec0062af3 100644 --- a/pydough/sqlglot/override_merge_subqueries.py +++ b/pydough/sqlglot/override_merge_subqueries.py @@ -199,7 +199,11 @@ def _mergeable( """ # PYDOUGH CHANGE: avoid merging CTEs when it would break a left join. - if isinstance(from_or_join, exp.Join) and from_or_join.side not in ("INNER", ""): + if ( + isinstance(from_or_join, exp.Join) + and from_or_join.side not in ("INNER", "") + and len(inner_scope.expression.args.get("joins", [])) > 0 + ): return False # PYDOUGH CHANGE: avoid merging CTEs when the inner scope has a window diff --git a/tests/test_sql_refsols/conditional_functions_ansi.sql b/tests/test_sql_refsols/conditional_functions_ansi.sql index fa71725a1..f17079889 100644 --- a/tests/test_sql_refsols/conditional_functions_ansi.sql +++ b/tests/test_sql_refsols/conditional_functions_ansi.sql @@ -1,23 +1,17 @@ -WITH _s1 AS ( - SELECT - o_custkey, - o_totalprice - FROM tpch.orders -) SELECT CASE WHEN ANY_VALUE(customer.c_acctbal) > 1000 THEN 'High' ELSE 'Low' END AS iff_col, ANY_VALUE(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, - NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, - MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + COALESCE(MIN(orders.o_totalprice), 0.0) AS default_val, + NOT MIN(orders.o_totalprice) IS NULL AS has_acct_bal, + MIN(orders.o_totalprice) IS NULL AS no_acct_bal, CASE WHEN ANY_VALUE(customer.c_acctbal) > 0 THEN ANY_VALUE(customer.c_acctbal) ELSE NULL END AS no_debt_bal FROM tpch.customer AS customer -LEFT JOIN _s1 AS _s1 - ON _s1.o_custkey = customer.c_custkey +LEFT JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 GROUP BY diff --git a/tests/test_sql_refsols/conditional_functions_mysql.sql b/tests/test_sql_refsols/conditional_functions_mysql.sql index 3aabbf468..b41dd6053 100644 --- a/tests/test_sql_refsols/conditional_functions_mysql.sql +++ b/tests/test_sql_refsols/conditional_functions_mysql.sql @@ -1,23 +1,17 @@ -WITH _s1 AS ( - SELECT - o_custkey, - o_totalprice - FROM tpch.ORDERS -) SELECT CASE WHEN ANY_VALUE(CUSTOMER.c_acctbal) > 1000 THEN 'High' ELSE 'Low' END AS iff_col, ANY_VALUE(CUSTOMER.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, - NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, - MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + COALESCE(MIN(ORDERS.o_totalprice), 0.0) AS default_val, + NOT MIN(ORDERS.o_totalprice) IS NULL AS has_acct_bal, + MIN(ORDERS.o_totalprice) IS NULL AS no_acct_bal, CASE WHEN ANY_VALUE(CUSTOMER.c_acctbal) > 0 THEN ANY_VALUE(CUSTOMER.c_acctbal) ELSE NULL END AS no_debt_bal FROM tpch.CUSTOMER AS CUSTOMER -LEFT JOIN _s1 AS _s1 - ON CUSTOMER.c_custkey = _s1.o_custkey +LEFT JOIN tpch.ORDERS AS ORDERS + ON CUSTOMER.c_custkey = ORDERS.o_custkey WHERE CUSTOMER.c_acctbal <= 1000 AND CUSTOMER.c_acctbal >= 100 GROUP BY diff --git a/tests/test_sql_refsols/conditional_functions_postgres.sql b/tests/test_sql_refsols/conditional_functions_postgres.sql index bd9ee0f8a..e6ed56574 100644 --- a/tests/test_sql_refsols/conditional_functions_postgres.sql +++ b/tests/test_sql_refsols/conditional_functions_postgres.sql @@ -1,19 +1,13 @@ -WITH _s1 AS ( - SELECT - o_custkey, - o_totalprice - FROM tpch.orders -) SELECT CASE WHEN MAX(customer.c_acctbal) > 1000 THEN 'High' ELSE 'Low' END AS iff_col, MAX(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, - NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, - MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + COALESCE(MIN(orders.o_totalprice), 0.0) AS default_val, + NOT MIN(orders.o_totalprice) IS NULL AS has_acct_bal, + MIN(orders.o_totalprice) IS NULL AS no_acct_bal, CASE WHEN MAX(customer.c_acctbal) > 0 THEN MAX(customer.c_acctbal) ELSE NULL END AS no_debt_bal FROM tpch.customer AS customer -LEFT JOIN _s1 AS _s1 - ON _s1.o_custkey = customer.c_custkey +LEFT JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 GROUP BY diff --git a/tests/test_sql_refsols/conditional_functions_snowflake.sql b/tests/test_sql_refsols/conditional_functions_snowflake.sql index 56a55ff92..4260b4e22 100644 --- a/tests/test_sql_refsols/conditional_functions_snowflake.sql +++ b/tests/test_sql_refsols/conditional_functions_snowflake.sql @@ -1,23 +1,17 @@ -WITH _s1 AS ( - SELECT - o_custkey, - o_totalprice - FROM tpch.orders -) SELECT IFF(ANY_VALUE(customer.c_acctbal) > 1000, 'High', 'Low') AS iff_col, ANY_VALUE(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, - NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, - MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + COALESCE(MIN(orders.o_totalprice), 0.0) AS default_val, + NOT MIN(orders.o_totalprice) IS NULL AS has_acct_bal, + MIN(orders.o_totalprice) IS NULL AS no_acct_bal, CASE WHEN ANY_VALUE(customer.c_acctbal) > 0 THEN ANY_VALUE(customer.c_acctbal) ELSE NULL END AS no_debt_bal FROM tpch.customer AS customer -LEFT JOIN _s1 AS _s1 - ON _s1.o_custkey = customer.c_custkey +LEFT JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 GROUP BY diff --git a/tests/test_sql_refsols/conditional_functions_sqlite.sql b/tests/test_sql_refsols/conditional_functions_sqlite.sql index e4a5e7f45..5690691ad 100644 --- a/tests/test_sql_refsols/conditional_functions_sqlite.sql +++ b/tests/test_sql_refsols/conditional_functions_sqlite.sql @@ -1,19 +1,13 @@ -WITH _s1 AS ( - SELECT - o_custkey, - o_totalprice - FROM tpch.orders -) SELECT IIF(MAX(customer.c_acctbal) > 1000, 'High', 'Low') AS iff_col, MAX(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, - COALESCE(MIN(_s1.o_totalprice), 0.0) AS default_val, - NOT MIN(_s1.o_totalprice) IS NULL AS has_acct_bal, - MIN(_s1.o_totalprice) IS NULL AS no_acct_bal, + COALESCE(MIN(orders.o_totalprice), 0.0) AS default_val, + NOT MIN(orders.o_totalprice) IS NULL AS has_acct_bal, + MIN(orders.o_totalprice) IS NULL AS no_acct_bal, CASE WHEN MAX(customer.c_acctbal) > 0 THEN MAX(customer.c_acctbal) ELSE NULL END AS no_debt_bal FROM tpch.customer AS customer -LEFT JOIN _s1 AS _s1 - ON _s1.o_custkey = customer.c_custkey +LEFT JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey WHERE customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 GROUP BY diff --git a/tests/test_sql_refsols/correl_8_sqlite.sql b/tests/test_sql_refsols/correl_8_sqlite.sql index 06962dcb4..856108a52 100644 --- a/tests/test_sql_refsols/correl_8_sqlite.sql +++ b/tests/test_sql_refsols/correl_8_sqlite.sql @@ -1,16 +1,9 @@ -WITH _s1 AS ( - SELECT - SUBSTRING(r_name, 1, 1) AS expr_0, - r_name, - r_regionkey - FROM tpch.region -) SELECT nation.n_name AS name, - _s1.r_name AS rname + region.r_name AS rname FROM tpch.nation AS nation -LEFT JOIN _s1 AS _s1 - ON _s1.expr_0 = SUBSTRING(nation.n_name, 1, 1) - AND _s1.r_regionkey = nation.n_regionkey +LEFT JOIN tpch.region AS region + ON SUBSTRING(nation.n_name, 1, 1) = SUBSTRING(region.r_name, 1, 1) + AND nation.n_regionkey = region.r_regionkey ORDER BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql index 77cf19d0e..7c25d6cf4 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql @@ -1,14 +1,10 @@ -WITH _s1 AS ( - SELECT - oid - FROM main.author -), _t1 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(organization.continent) AS anything_continent, - COUNT(_s1.oid) AS count_oid + COUNT(author.oid) AS count_oid FROM main.organization AS organization - LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + LEFT JOIN main.author AS author + ON author.oid = organization.oid GROUP BY organization.oid ) diff --git a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql index 77cf19d0e..7c25d6cf4 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql @@ -1,14 +1,10 @@ -WITH _s1 AS ( - SELECT - oid - FROM main.author -), _t1 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(organization.continent) AS anything_continent, - COUNT(_s1.oid) AS count_oid + COUNT(author.oid) AS count_oid FROM main.organization AS organization - LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + LEFT JOIN main.author AS author + ON author.oid = organization.oid GROUP BY organization.oid ) diff --git a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql index 33e71aa40..51097fc92 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql @@ -1,14 +1,10 @@ -WITH _s1 AS ( - SELECT - oid - FROM main.author -), _t1 AS ( +WITH _t1 AS ( SELECT MAX(organization.continent) AS anything_continent, - COUNT(_s1.oid) AS count_oid + COUNT(author.oid) AS count_oid FROM main.organization AS organization - LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + LEFT JOIN main.author AS author + ON author.oid = organization.oid GROUP BY organization.oid ) diff --git a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql index b1767f69f..d90d873a6 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql @@ -1,14 +1,10 @@ -WITH _s1 AS ( - SELECT - oid - FROM main.author -), _t1 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(organization.continent) AS anything_continent, - COUNT(_s1.oid) AS count_oid + COUNT(author.oid) AS count_oid FROM main.organization AS organization - LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + LEFT JOIN main.author AS author + ON author.oid = organization.oid GROUP BY organization.oid ) diff --git a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql index 0e819c397..6dca6a500 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql @@ -1,14 +1,10 @@ -WITH _s1 AS ( - SELECT - oid - FROM main.author -), _t1 AS ( +WITH _t1 AS ( SELECT MAX(organization.continent) AS anything_continent, - COUNT(_s1.oid) AS count_oid + COUNT(author.oid) AS count_oid FROM main.organization AS organization - LEFT JOIN _s1 AS _s1 - ON _s1.oid = organization.oid + LEFT JOIN main.author AS author + ON author.oid = organization.oid GROUP BY organization.oid ) diff --git a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql index b5ef1f4fd..504cb2598 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT ANY_VALUE(conference.name) AS name, - COUNT(_s1.cid) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql index e9ff73f69..b8207083a 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COUNT(_s1.cid) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql index e2f50fde0..4635122b3 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT MAX(conference.name) AS name, - COUNT(_s1.cid) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql index 519888bc7..a4ef4321b 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT ANY_VALUE(conference.name) AS name, - COUNT(_s1.cid) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql index 0504ad5e3..492220fdb 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT MAX(conference.name) AS name, - COUNT(_s1.cid) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql index 50870e1ee..38b7fbcc1 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT ANY_VALUE(conference.name) AS name, - COUNT(_s1.cid) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql index 8d051c160..846c76c9d 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COUNT(_s1.cid) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql index 450cbfea6..4b7b9b29b 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT MAX(conference.name) AS name, - COUNT(_s1.cid) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql index ada8fbc3f..5cfa8fc98 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT ANY_VALUE(conference.name) AS name, - COUNT(_s1.cid) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql index 931325963..4605e9446 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql @@ -1,14 +1,9 @@ -WITH _s1 AS ( - SELECT - cid - FROM main.publication -) SELECT MAX(conference.name) AS name, - COUNT(_s1.cid) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference -LEFT JOIN _s1 AS _s1 - ON _s1.cid = conference.cid +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid GROUP BY conference.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index 61425e9f0..1ba9a3495 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -1,15 +1,11 @@ -WITH _s1 AS ( - SELECT - car_id - FROM main.sales -), _t0 AS ( +WITH _t0 AS ( SELECT ANY_VALUE(cars.make) AS anything_make, ANY_VALUE(cars.model) AS anything_model, - COUNT(_s1.car_id) AS count_car_id + COUNT(sales.car_id) AS count_car_id FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id WHERE LOWER(cars.vin_number) LIKE '%m5%' GROUP BY diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index 61425e9f0..1ba9a3495 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -1,15 +1,11 @@ -WITH _s1 AS ( - SELECT - car_id - FROM main.sales -), _t0 AS ( +WITH _t0 AS ( SELECT ANY_VALUE(cars.make) AS anything_make, ANY_VALUE(cars.model) AS anything_model, - COUNT(_s1.car_id) AS count_car_id + COUNT(sales.car_id) AS count_car_id FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id WHERE LOWER(cars.vin_number) LIKE '%m5%' GROUP BY diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index 73ade4a2b..37b05b43d 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -1,15 +1,11 @@ -WITH _s1 AS ( - SELECT - car_id - FROM main.sales -), _t0 AS ( +WITH _t0 AS ( SELECT MAX(cars.make) AS anything_make, MAX(cars.model) AS anything_model, - COUNT(_s1.car_id) AS count_car_id + COUNT(sales.car_id) AS count_car_id FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id WHERE LOWER(cars.vin_number) LIKE '%m5%' GROUP BY diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index 0b0f18d90..7c13d9c3b 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -1,15 +1,11 @@ -WITH _s1 AS ( - SELECT - car_id - FROM main.sales -), _t0 AS ( +WITH _t0 AS ( SELECT ANY_VALUE(cars.make) AS anything_make, ANY_VALUE(cars.model) AS anything_model, - COUNT(_s1.car_id) AS count_car_id + COUNT(sales.car_id) AS count_car_id FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id WHERE CONTAINS(LOWER(cars.vin_number), 'm5') GROUP BY diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index 73ade4a2b..37b05b43d 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -1,15 +1,11 @@ -WITH _s1 AS ( - SELECT - car_id - FROM main.sales -), _t0 AS ( +WITH _t0 AS ( SELECT MAX(cars.make) AS anything_make, MAX(cars.model) AS anything_model, - COUNT(_s1.car_id) AS count_car_id + COUNT(sales.car_id) AS count_car_id FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id WHERE LOWER(cars.vin_number) LIKE '%m5%' GROUP BY diff --git a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql index 7520ba4e5..1126345c2 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql @@ -5,21 +5,16 @@ WITH _t2 AS ( QUALIFY NOT is_in_inventory AND ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC NULLS FIRST) = 1 -), _s3 AS ( - SELECT - car_id, - sale_price - FROM main.sales ) SELECT ANY_VALUE(cars.make) AS make, ANY_VALUE(cars.model) AS model, - MAX(_s3.sale_price) AS highest_sale_price + MAX(sales.sale_price) AS highest_sale_price FROM main.cars AS cars JOIN _t2 AS _t2 ON _t2.car_id = cars._id -LEFT JOIN _s3 AS _s3 - ON _s3.car_id = cars._id +LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id GROUP BY cars._id ORDER BY diff --git a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql index 4aa84c3d5..dbae4e04f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql @@ -4,21 +4,16 @@ WITH _t AS ( is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC) AS _w FROM main.inventory_snapshots -), _s3 AS ( - SELECT - car_id, - sale_price - FROM main.sales ) SELECT ANY_VALUE(cars.make) AS make, ANY_VALUE(cars.model) AS model, - MAX(_s3.sale_price) AS highest_sale_price + MAX(sales.sale_price) AS highest_sale_price FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id -LEFT JOIN _s3 AS _s3 - ON _s3.car_id = cars._id +LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id GROUP BY cars._id ORDER BY diff --git a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql index 7ade64db5..afd34c12e 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql @@ -4,21 +4,16 @@ WITH _t AS ( is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC) AS _w FROM main.inventory_snapshots -), _s3 AS ( - SELECT - car_id, - sale_price - FROM main.sales ) SELECT MAX(cars.make) AS make, MAX(cars.model) AS model, - MAX(_s3.sale_price) AS highest_sale_price + MAX(sales.sale_price) AS highest_sale_price FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id -LEFT JOIN _s3 AS _s3 - ON _s3.car_id = cars._id +LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id GROUP BY cars._id ORDER BY diff --git a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql index f23654281..10d15777f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql @@ -5,21 +5,16 @@ WITH _t2 AS ( QUALIFY NOT is_in_inventory AND ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC) = 1 -), _s3 AS ( - SELECT - car_id, - sale_price - FROM main.sales ) SELECT ANY_VALUE(cars.make) AS make, ANY_VALUE(cars.model) AS model, - MAX(_s3.sale_price) AS highest_sale_price + MAX(sales.sale_price) AS highest_sale_price FROM main.cars AS cars JOIN _t2 AS _t2 ON _t2.car_id = cars._id -LEFT JOIN _s3 AS _s3 - ON _s3.car_id = cars._id +LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id GROUP BY cars._id ORDER BY diff --git a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql index 6c91d9155..0d704d83a 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql @@ -4,21 +4,16 @@ WITH _t AS ( is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC) AS _w FROM main.inventory_snapshots -), _s3 AS ( - SELECT - car_id, - sale_price - FROM main.sales ) SELECT MAX(cars.make) AS make, MAX(cars.model) AS model, - MAX(_s3.sale_price) AS highest_sale_price + MAX(sales.sale_price) AS highest_sale_price FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id -LEFT JOIN _s3 AS _s3 - ON _s3.car_id = cars._id +LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id GROUP BY cars._id ORDER BY diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql index 583b1362e..e86b2495d 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_ansi.sql @@ -1,16 +1,11 @@ -WITH _s3 AS ( +WITH _t1 AS ( SELECT - patient_id, - start_dt - FROM main.treatments -), _t1 AS ( - SELECT - MIN(EXTRACT(YEAR FROM CAST(_s3.start_dt AS DATETIME))) AS min_year_start_dt + MIN(EXTRACT(YEAR FROM CAST(treatments_2.start_dt AS DATETIME))) AS min_year_start_dt FROM main.patients AS patients JOIN main.treatments AS treatments ON patients.patient_id = treatments.patient_id - LEFT JOIN _s3 AS _s3 - ON _s3.patient_id = patients.patient_id + LEFT JOIN main.treatments AS treatments_2 + ON patients.patient_id = treatments_2.patient_id GROUP BY patients.patient_id ), _t0 AS ( diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql index bb262687c..525de8128 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_mysql.sql @@ -4,19 +4,14 @@ WITH _u_0 AS ( FROM main.treatments GROUP BY 1 -), _s3 AS ( - SELECT - patient_id, - start_dt - FROM main.treatments ), _t1 AS ( SELECT - MIN(EXTRACT(YEAR FROM CAST(_s3.start_dt AS DATETIME))) AS min_year_start_dt + MIN(EXTRACT(YEAR FROM CAST(treatments.start_dt AS DATETIME))) AS min_year_start_dt FROM main.patients AS patients LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = patients.patient_id - LEFT JOIN _s3 AS _s3 - ON _s3.patient_id = patients.patient_id + LEFT JOIN main.treatments AS treatments + ON patients.patient_id = treatments.patient_id WHERE NOT _u_0._u_1 IS NULL GROUP BY diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql index 3658de2f2..faa66b9ef 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_postgres.sql @@ -4,19 +4,14 @@ WITH _u_0 AS ( FROM main.treatments GROUP BY 1 -), _s3 AS ( - SELECT - patient_id, - start_dt - FROM main.treatments ), _t1 AS ( SELECT - MIN(EXTRACT(YEAR FROM CAST(_s3.start_dt AS TIMESTAMP))) AS min_year_start_dt + MIN(EXTRACT(YEAR FROM CAST(treatments.start_dt AS TIMESTAMP))) AS min_year_start_dt FROM main.patients AS patients LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = patients.patient_id - LEFT JOIN _s3 AS _s3 - ON _s3.patient_id = patients.patient_id + LEFT JOIN main.treatments AS treatments + ON patients.patient_id = treatments.patient_id WHERE NOT _u_0._u_1 IS NULL GROUP BY diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql index 00510540c..046c47d1f 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_snowflake.sql @@ -4,19 +4,14 @@ WITH _u_0 AS ( FROM main.treatments GROUP BY 1 -), _s3 AS ( - SELECT - patient_id, - start_dt - FROM main.treatments ), _t1 AS ( SELECT - MIN(YEAR(CAST(_s3.start_dt AS TIMESTAMP))) AS min_year_start_dt + MIN(YEAR(CAST(treatments.start_dt AS TIMESTAMP))) AS min_year_start_dt FROM main.patients AS patients LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = patients.patient_id - LEFT JOIN _s3 AS _s3 - ON _s3.patient_id = patients.patient_id + LEFT JOIN main.treatments AS treatments + ON patients.patient_id = treatments.patient_id WHERE NOT _u_0._u_1 IS NULL GROUP BY diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql index fe455efe8..4fba654ce 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_sqlite.sql @@ -4,19 +4,14 @@ WITH _u_0 AS ( FROM main.treatments GROUP BY 1 -), _s3 AS ( - SELECT - patient_id, - start_dt - FROM main.treatments ), _t1 AS ( SELECT - MIN(CAST(STRFTIME('%Y', _s3.start_dt) AS INTEGER)) AS min_year_start_dt + MIN(CAST(STRFTIME('%Y', treatments.start_dt) AS INTEGER)) AS min_year_start_dt FROM main.patients AS patients LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = patients.patient_id - LEFT JOIN _s3 AS _s3 - ON _s3.patient_id = patients.patient_id + LEFT JOIN main.treatments AS treatments + ON patients.patient_id = treatments.patient_id WHERE NOT _u_0._u_1 IS NULL GROUP BY diff --git a/tests/test_sql_refsols/defog_dermtreatment_gen2_ansi.sql b/tests/test_sql_refsols/defog_dermtreatment_gen2_ansi.sql index 383878883..a2f0b75ae 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_gen2_ansi.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_gen2_ansi.sql @@ -6,20 +6,14 @@ WITH _t1 AS ( FROM main.treatments QUALIFY ROW_NUMBER() OVER (PARTITION BY doc_id ORDER BY start_dt NULLS LAST) = 1 -), _s1 AS ( - SELECT - doc_id, - start_dt, - treatment_id - FROM _t1 ) SELECT doctors.last_name, doctors.year_reg, - _s1.start_dt AS first_treatment_date, - _s1.treatment_id AS first_treatment_id + _t1.start_dt AS first_treatment_date, + _t1.treatment_id AS first_treatment_id FROM main.doctors AS doctors -LEFT JOIN _s1 AS _s1 - ON _s1.doc_id = doctors.doc_id +LEFT JOIN _t1 AS _t1 + ON _t1.doc_id = doctors.doc_id WHERE doctors.year_reg = EXTRACT(YEAR FROM DATE_SUB(CURRENT_TIMESTAMP(), 2, YEAR)) diff --git a/tests/test_sql_refsols/defog_dermtreatment_gen2_snowflake.sql b/tests/test_sql_refsols/defog_dermtreatment_gen2_snowflake.sql index be1f1e306..7aa9c8338 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_gen2_snowflake.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_gen2_snowflake.sql @@ -6,20 +6,14 @@ WITH _t1 AS ( FROM main.treatments QUALIFY ROW_NUMBER() OVER (PARTITION BY doc_id ORDER BY start_dt) = 1 -), _s1 AS ( - SELECT - doc_id, - start_dt, - treatment_id - FROM _t1 ) SELECT doctors.last_name, doctors.year_reg, - _s1.start_dt AS first_treatment_date, - _s1.treatment_id AS first_treatment_id + _t1.start_dt AS first_treatment_date, + _t1.treatment_id AS first_treatment_id FROM main.doctors AS doctors -LEFT JOIN _s1 AS _s1 - ON _s1.doc_id = doctors.doc_id +LEFT JOIN _t1 AS _t1 + ON _t1.doc_id = doctors.doc_id WHERE doctors.year_reg = YEAR(DATEADD(YEAR, -2, CURRENT_TIMESTAMP())) diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql index 36a527030..e7b28d656 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_ansi.sql @@ -1,15 +1,9 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id - FROM main.wallet_transactions_daily -) SELECT coupons.cid AS coupon_id, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id WHERE coupons.merchant_id = '1' GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql index 36a527030..e7b28d656 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_mysql.sql @@ -1,15 +1,9 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id - FROM main.wallet_transactions_daily -) SELECT coupons.cid AS coupon_id, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id WHERE coupons.merchant_id = '1' GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql index 36a527030..e7b28d656 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_postgres.sql @@ -1,15 +1,9 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id - FROM main.wallet_transactions_daily -) SELECT coupons.cid AS coupon_id, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id WHERE coupons.merchant_id = '1' GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql index 36a527030..e7b28d656 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql @@ -1,15 +1,9 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id - FROM main.wallet_transactions_daily -) SELECT coupons.cid AS coupon_id, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id WHERE coupons.merchant_id = '1' GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql index 36a527030..e7b28d656 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv12_sqlite.sql @@ -1,15 +1,9 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id - FROM main.wallet_transactions_daily -) SELECT coupons.cid AS coupon_id, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id WHERE coupons.merchant_id = '1' GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index ef2037e72..fe8b9ea5a 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -1,17 +1,10 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id, - txid - FROM main.wallet_transactions_daily -) SELECT ANY_VALUE(coupons.code) AS coupon_code, - COUNT(_s1.txid) AS redemption_count, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id GROUP BY coupons.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql index ef2037e72..fe8b9ea5a 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql @@ -1,17 +1,10 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id, - txid - FROM main.wallet_transactions_daily -) SELECT ANY_VALUE(coupons.code) AS coupon_code, - COUNT(_s1.txid) AS redemption_count, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id GROUP BY coupons.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql index 6995be3d8..a2ac158c4 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql @@ -1,17 +1,10 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id, - txid - FROM main.wallet_transactions_daily -) SELECT MAX(coupons.code) AS coupon_code, - COUNT(_s1.txid) AS redemption_count, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id GROUP BY coupons.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql index e5af28e0c..f2308b204 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql @@ -1,17 +1,10 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id, - txid - FROM main.wallet_transactions_daily -) SELECT ANY_VALUE(coupons.code) AS coupon_code, - COUNT(_s1.txid) AS redemption_count, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id GROUP BY coupons.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index ed99eb3af..c40521c08 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -1,17 +1,10 @@ -WITH _s1 AS ( - SELECT - amount, - coupon_id, - txid - FROM main.wallet_transactions_daily -) SELECT MAX(coupons.code) AS coupon_code, - COUNT(_s1.txid) AS redemption_count, - COALESCE(SUM(_s1.amount), 0) AS total_discount + COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id GROUP BY coupons.cid ORDER BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql index 969fb9cb2..64fef3152 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - country, - uid - FROM main.users -) SELECT - _s1.country, + users.country, COUNT(DISTINCT wallet_transactions_daily.sender_id) AS user_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_amount FROM main.wallet_transactions_daily AS wallet_transactions_daily -LEFT JOIN _s1 AS _s1 - ON _s1.uid = wallet_transactions_daily.sender_id +LEFT JOIN main.users AS users + ON users.uid = wallet_transactions_daily.sender_id WHERE wallet_transactions_daily.sender_type = 0 GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic9_mysql.sql index 969fb9cb2..64fef3152 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_mysql.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - country, - uid - FROM main.users -) SELECT - _s1.country, + users.country, COUNT(DISTINCT wallet_transactions_daily.sender_id) AS user_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_amount FROM main.wallet_transactions_daily AS wallet_transactions_daily -LEFT JOIN _s1 AS _s1 - ON _s1.uid = wallet_transactions_daily.sender_id +LEFT JOIN main.users AS users + ON users.uid = wallet_transactions_daily.sender_id WHERE wallet_transactions_daily.sender_type = 0 GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic9_postgres.sql index 114ba10b7..4fd98674e 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_postgres.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - country, - uid - FROM main.users -) SELECT - _s1.country, + users.country, COUNT(DISTINCT wallet_transactions_daily.sender_id) AS user_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_amount FROM main.wallet_transactions_daily AS wallet_transactions_daily -LEFT JOIN _s1 AS _s1 - ON _s1.uid = wallet_transactions_daily.sender_id +LEFT JOIN main.users AS users + ON users.uid = wallet_transactions_daily.sender_id WHERE wallet_transactions_daily.sender_type = 0 GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic9_snowflake.sql index 114ba10b7..4fd98674e 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_snowflake.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - country, - uid - FROM main.users -) SELECT - _s1.country, + users.country, COUNT(DISTINCT wallet_transactions_daily.sender_id) AS user_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_amount FROM main.wallet_transactions_daily AS wallet_transactions_daily -LEFT JOIN _s1 AS _s1 - ON _s1.uid = wallet_transactions_daily.sender_id +LEFT JOIN main.users AS users + ON users.uid = wallet_transactions_daily.sender_id WHERE wallet_transactions_daily.sender_type = 0 GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql index 969fb9cb2..64fef3152 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql @@ -1,16 +1,10 @@ -WITH _s1 AS ( - SELECT - country, - uid - FROM main.users -) SELECT - _s1.country, + users.country, COUNT(DISTINCT wallet_transactions_daily.sender_id) AS user_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_amount FROM main.wallet_transactions_daily AS wallet_transactions_daily -LEFT JOIN _s1 AS _s1 - ON _s1.uid = wallet_transactions_daily.sender_id +LEFT JOIN main.users AS users + ON users.uid = wallet_transactions_daily.sender_id WHERE wallet_transactions_daily.sender_type = 0 GROUP BY diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql b/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql index 1e7e1f73f..169cba686 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_ansi.sql @@ -10,23 +10,17 @@ WITH _t1 AS ( FROM _t1 GROUP BY 1 -), _s3 AS ( - SELECT - cid, - merchant_id, - start_date - FROM main.coupons ), _s4 AS ( SELECT merchants.mid, _s1.min_start_date, ANY_VALUE(merchants.created_at) AS anything_created_at, - MAX(_s3.cid) AS max_cid + MAX(coupons.cid) AS max_cid FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid - LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid + LEFT JOIN main.coupons AS coupons + ON _s1.min_start_date = coupons.start_date AND coupons.merchant_id = merchants.mid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql b/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql index 630ebae9a..be0cd0c51 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_mysql.sql @@ -10,23 +10,17 @@ WITH _t1 AS ( FROM _t1 GROUP BY 1 -), _s3 AS ( - SELECT - cid, - merchant_id, - start_date - FROM main.coupons ), _s4 AS ( SELECT merchants.mid, _s1.min_start_date, ANY_VALUE(merchants.created_at) AS anything_created_at, - MAX(_s3.cid) AS max_cid + MAX(coupons.cid) AS max_cid FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid - LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid + LEFT JOIN main.coupons AS coupons + ON _s1.min_start_date = coupons.start_date AND coupons.merchant_id = merchants.mid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql b/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql index 170ef19bd..3d5c151c9 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_postgres.sql @@ -10,23 +10,17 @@ WITH _t1 AS ( FROM _t1 GROUP BY 1 -), _s3 AS ( - SELECT - cid, - merchant_id, - start_date - FROM main.coupons ), _s4 AS ( SELECT merchants.mid, _s1.min_start_date, MAX(merchants.created_at) AS anything_created_at, - MAX(_s3.cid) AS max_cid + MAX(coupons.cid) AS max_cid FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid - LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid + LEFT JOIN main.coupons AS coupons + ON _s1.min_start_date = coupons.start_date AND coupons.merchant_id = merchants.mid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql index 18bae48fc..169a87861 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql @@ -10,23 +10,17 @@ WITH _t1 AS ( FROM _t1 GROUP BY 1 -), _s3 AS ( - SELECT - cid, - merchant_id, - start_date - FROM main.coupons ), _s4 AS ( SELECT merchants.mid, _s1.min_start_date, ANY_VALUE(merchants.created_at) AS anything_created_at, - MAX(_s3.cid) AS max_cid + MAX(coupons.cid) AS max_cid FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid - LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid + LEFT JOIN main.coupons AS coupons + ON _s1.min_start_date = coupons.start_date AND coupons.merchant_id = merchants.mid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql index 30b148903..605f1bcd1 100644 --- a/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_gen4_sqlite.sql @@ -10,23 +10,17 @@ WITH _t1 AS ( FROM _t1 GROUP BY 1 -), _s3 AS ( - SELECT - cid, - merchant_id, - start_date - FROM main.coupons ), _s4 AS ( SELECT merchants.mid, _s1.min_start_date, MAX(merchants.created_at) AS anything_created_at, - MAX(_s3.cid) AS max_cid + MAX(coupons.cid) AS max_cid FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid - LEFT JOIN _s3 AS _s3 - ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid + LEFT JOIN main.coupons AS coupons + ON _s1.min_start_date = coupons.start_date AND coupons.merchant_id = merchants.mid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index b995e209d..594ad2d7d 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -7,22 +7,18 @@ WITH _s0 AS ( SELECT co_id FROM main.countries -), _s7 AS ( - SELECT - in_device_id - FROM main.incidents ), _t1 AS ( SELECT ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, - COUNT(_s7.in_device_id) AS count_in_device_id + COUNT(incidents.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices ON _s2.co_id = devices.de_production_country_id AND _s3.co_id = devices.de_purchase_country_id - LEFT JOIN _s7 AS _s7 - ON _s7.in_device_id = devices.de_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id GROUP BY devices.de_id ), _s9 AS ( diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql index fe6e8a558..1ec856c5f 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql @@ -7,22 +7,18 @@ WITH _s0 AS ( SELECT co_id FROM main.COUNTRIES -), _s7 AS ( - SELECT - in_device_id - FROM main.INCIDENTS ), _t1 AS ( SELECT ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, - COUNT(_s7.in_device_id) AS count_in_device_id + COUNT(INCIDENTS.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.DEVICES AS DEVICES ON DEVICES.de_production_country_id = _s2.co_id AND DEVICES.de_purchase_country_id = _s3.co_id - LEFT JOIN _s7 AS _s7 - ON DEVICES.de_id = _s7.in_device_id + LEFT JOIN main.INCIDENTS AS INCIDENTS + ON DEVICES.de_id = INCIDENTS.in_device_id GROUP BY DEVICES.de_id ), _s9 AS ( diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql index 71a0fc511..0fc310b44 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql @@ -7,22 +7,18 @@ WITH _s0 AS ( SELECT co_id FROM main.countries -), _s7 AS ( - SELECT - in_device_id - FROM main.incidents ), _t1 AS ( SELECT MAX(_s3.co_id) AS anything__id_3, MAX(_s2.co_id) AS anything_co_id, - COUNT(_s7.in_device_id) AS count_in_device_id + COUNT(incidents.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices ON _s2.co_id = devices.de_production_country_id AND _s3.co_id = devices.de_purchase_country_id - LEFT JOIN _s7 AS _s7 - ON _s7.in_device_id = devices.de_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id GROUP BY devices.de_id ), _s9 AS ( diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql index a39d4b1e1..e5298c497 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -7,22 +7,18 @@ WITH _s0 AS ( SELECT co_id FROM main.countries -), _s7 AS ( - SELECT - in_device_id - FROM main.incidents ), _t1 AS ( SELECT ANY_VALUE(_s3.co_id) AS anything__id_3, ANY_VALUE(_s2.co_id) AS anything_co_id, - COUNT(_s7.in_device_id) AS count_in_device_id + COUNT(incidents.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices ON _s2.co_id = devices.de_production_country_id AND _s3.co_id = devices.de_purchase_country_id - LEFT JOIN _s7 AS _s7 - ON _s7.in_device_id = devices.de_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id GROUP BY devices.de_id ), _s9 AS ( diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index ff19c8841..ce558aa04 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -7,22 +7,18 @@ WITH _s0 AS ( SELECT co_id FROM main.countries -), _s7 AS ( - SELECT - in_device_id - FROM main.incidents ), _t1 AS ( SELECT MAX(_s3.co_id) AS anything__id_3, MAX(_s2.co_id) AS anything_co_id, - COUNT(_s7.in_device_id) AS count_in_device_id + COUNT(incidents.in_device_id) AS count_in_device_id FROM _s2 AS _s2 CROSS JOIN _s2 AS _s3 JOIN main.devices AS devices ON _s2.co_id = devices.de_production_country_id AND _s3.co_id = devices.de_purchase_country_id - LEFT JOIN _s7 AS _s7 - ON _s7.in_device_id = devices.de_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id GROUP BY devices.de_id ), _s9 AS ( diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index 87fedf6a9..d58d6a645 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -1,16 +1,12 @@ -WITH _s3 AS ( - SELECT - in_device_id - FROM main.incidents -), _t1 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, - COUNT(_s3.in_device_id) AS count_in_device_id + COUNT(incidents.in_device_id) AS count_in_device_id FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' - LEFT JOIN _s3 AS _s3 - ON _s3.in_device_id = devices.de_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id GROUP BY devices.de_id ), _s5 AS ( diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql index 718d1659f..9afda834e 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql @@ -1,16 +1,12 @@ -WITH _s3 AS ( - SELECT - in_device_id - FROM main.INCIDENTS -), _t1 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(DEVICES.de_production_country_id) AS anything_de_production_country_id, - COUNT(_s3.in_device_id) AS count_in_device_id + COUNT(INCIDENTS.in_device_id) AS count_in_device_id FROM main.DEVICES AS DEVICES JOIN main.PRODUCTS AS PRODUCTS ON DEVICES.de_product_id = PRODUCTS.pr_id AND PRODUCTS.pr_name = 'Sun-Set' - LEFT JOIN _s3 AS _s3 - ON DEVICES.de_id = _s3.in_device_id + LEFT JOIN main.INCIDENTS AS INCIDENTS + ON DEVICES.de_id = INCIDENTS.in_device_id GROUP BY DEVICES.de_id ), _s5 AS ( diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql index e4f683ea6..726ec8133 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql @@ -1,16 +1,12 @@ -WITH _s3 AS ( - SELECT - in_device_id - FROM main.incidents -), _t1 AS ( +WITH _t1 AS ( SELECT MAX(devices.de_production_country_id) AS anything_de_production_country_id, - COUNT(_s3.in_device_id) AS count_in_device_id + COUNT(incidents.in_device_id) AS count_in_device_id FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' - LEFT JOIN _s3 AS _s3 - ON _s3.in_device_id = devices.de_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id GROUP BY devices.de_id ), _s5 AS ( diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql index ee93aa6dc..f2fbf2e37 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -1,16 +1,12 @@ -WITH _s3 AS ( - SELECT - in_device_id - FROM main.incidents -), _t1 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, - COUNT(_s3.in_device_id) AS count_in_device_id + COUNT(incidents.in_device_id) AS count_in_device_id FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' - LEFT JOIN _s3 AS _s3 - ON _s3.in_device_id = devices.de_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id GROUP BY devices.de_id ), _s5 AS ( diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index 682b2dd3d..daad93038 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -1,16 +1,12 @@ -WITH _s3 AS ( - SELECT - in_device_id - FROM main.incidents -), _t1 AS ( +WITH _t1 AS ( SELECT MAX(devices.de_production_country_id) AS anything_de_production_country_id, - COUNT(_s3.in_device_id) AS count_in_device_id + COUNT(incidents.in_device_id) AS count_in_device_id FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' - LEFT JOIN _s3 AS _s3 - ON _s3.in_device_id = devices.de_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id GROUP BY devices.de_id ), _s5 AS ( From 6a114350f3798ba4eb97084290d9ad904de3b6ba Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 17:21:17 -0400 Subject: [PATCH 117/143] Fixing bug with the COUNT left case --- .../conversion/join_aggregate_transpose.py | 72 ++++++++++++++++--- tests/test_plan_refsols/common_prefix_ag.txt | 72 ++++++++++--------- tests/test_plan_refsols/common_prefix_ah.txt | 6 +- tests/test_plan_refsols/common_prefix_ai.txt | 61 ++++++++-------- tests/test_plan_refsols/common_prefix_aj.txt | 72 ++++++++++--------- tests/test_plan_refsols/common_prefix_ak.txt | 56 ++++++++------- tests/test_plan_refsols/common_prefix_an.txt | 25 +++---- tests/test_plan_refsols/common_prefix_ao.txt | 16 ++--- tests/test_plan_refsols/common_prefix_u.txt | 4 +- tests/test_plan_refsols/common_prefix_x.txt | 4 +- tests/test_plan_refsols/common_prefix_y.txt | 19 ++--- .../epoch_intra_season_searches.txt | 2 +- tests/test_plan_refsols/singular7.txt | 23 +++--- ...chnograph_country_combination_analysis.txt | 6 +- ...nograph_country_incident_rate_analysis.txt | 6 +- ..._error_rate_sun_set_by_factory_country.txt | 21 +++--- tests/test_plan_refsols/tpch_q21.txt | 2 +- .../window_filter_order_1.txt | 2 +- .../window_filter_order_2.txt | 2 +- .../window_filter_order_3.txt | 2 +- .../window_filter_order_8.txt | 2 +- .../defog_academic_gen15_ansi.sql | 2 +- .../defog_academic_gen15_mysql.sql | 2 +- .../defog_academic_gen15_postgres.sql | 2 +- .../defog_academic_gen15_snowflake.sql | 2 +- .../defog_academic_gen15_sqlite.sql | 2 +- .../defog_academic_gen17_ansi.sql | 2 +- .../defog_academic_gen17_mysql.sql | 2 +- .../defog_academic_gen17_postgres.sql | 2 +- .../defog_academic_gen17_snowflake.sql | 2 +- .../defog_academic_gen17_sqlite.sql | 2 +- .../defog_academic_gen19_ansi.sql | 2 +- .../defog_academic_gen19_mysql.sql | 2 +- .../defog_academic_gen19_postgres.sql | 2 +- .../defog_academic_gen19_snowflake.sql | 2 +- .../defog_academic_gen19_sqlite.sql | 2 +- .../defog_dealership_adv3_ansi.sql | 6 +- .../defog_dealership_adv3_mysql.sql | 6 +- .../defog_dealership_adv3_postgres.sql | 6 +- .../defog_dealership_adv3_snowflake.sql | 6 +- .../defog_dealership_adv3_sqlite.sql | 6 +- .../defog_dealership_adv4_ansi.sql | 11 ++- .../defog_dealership_adv4_mysql.sql | 11 ++- .../defog_dealership_adv4_postgres.sql | 11 ++- .../defog_dealership_adv4_snowflake.sql | 11 ++- .../defog_dealership_adv4_sqlite.sql | 11 ++- .../defog_ewallet_basic10_ansi.sql | 2 +- .../defog_ewallet_basic10_mysql.sql | 2 +- .../defog_ewallet_basic10_postgres.sql | 2 +- .../defog_ewallet_basic10_snowflake.sql | 2 +- .../defog_ewallet_basic10_sqlite.sql | 2 +- .../epoch_intra_season_searches_ansi.sql | 7 +- .../epoch_intra_season_searches_mysql.sql | 7 +- .../epoch_intra_season_searches_postgres.sql | 11 ++- .../epoch_intra_season_searches_snowflake.sql | 7 +- .../epoch_intra_season_searches_sqlite.sql | 7 +- ...raph_country_combination_analysis_ansi.sql | 4 +- ...aph_country_combination_analysis_mysql.sql | 4 +- ..._country_combination_analysis_postgres.sql | 4 +- ...country_combination_analysis_snowflake.sql | 4 +- ...ph_country_combination_analysis_sqlite.sql | 4 +- ...ph_country_incident_rate_analysis_ansi.sql | 4 +- ...h_country_incident_rate_analysis_mysql.sql | 4 +- ...ountry_incident_rate_analysis_postgres.sql | 4 +- ...untry_incident_rate_analysis_snowflake.sql | 4 +- ..._country_incident_rate_analysis_sqlite.sql | 7 +- ...r_rate_sun_set_by_factory_country_ansi.sql | 12 ++-- ..._rate_sun_set_by_factory_country_mysql.sql | 12 ++-- ...te_sun_set_by_factory_country_postgres.sql | 12 ++-- ...e_sun_set_by_factory_country_snowflake.sql | 12 ++-- ...rate_sun_set_by_factory_country_sqlite.sql | 12 ++-- tests/test_sql_refsols/tpch_q21_ansi.sql | 9 ++- tests/test_sql_refsols/tpch_q21_mysql.sql | 9 ++- tests/test_sql_refsols/tpch_q21_postgres.sql | 9 ++- tests/test_sql_refsols/tpch_q21_snowflake.sql | 9 ++- tests/test_sql_refsols/tpch_q21_sqlite.sql | 9 ++- 76 files changed, 475 insertions(+), 312 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index 89135e4cc..a73390196 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -14,6 +14,7 @@ Join, JoinCardinality, JoinType, + LiteralExpression, Project, RelationalExpression, RelationalNode, @@ -25,7 +26,7 @@ apply_substitution, extract_equijoin_keys, ) -from pydough.types import NumericType +from pydough.types import BooleanType, NumericType class JoinAggregateTransposeShuttle(RelationalShuttle): @@ -174,11 +175,21 @@ def join_aggregate_transpose( if not is_left: agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs + # TODO ADD COMMENTS + agg_alias: str | None = ( + join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + ) + non_agg_alias: str | None = ( + join.default_input_aliases[1] if is_left else join.default_input_aliases[0] + ) + # Now that the transpose is deemed possible, if in the left join # scenario, transform any `COUNT(*)` calls into `COUNT(col)`, where # `col` is one of the aggregation keys. If this is not possible, then # abort. Also abort if any of the aggregation keys are not used as # equi-join keys. + sentinel_column: RelationalExpression | None = None + existing_sentinel: str | None = None if left_join_case and any( agg.op == pydop.COUNT and len(agg.inputs) == 0 for agg in aggregate.aggregations.values() @@ -193,15 +204,39 @@ def join_aggregate_transpose( ) for agg_name, agg in aggregate.aggregations.items(): if agg.op == pydop.COUNT and len(agg.inputs) == 0: + existing_sentinel = agg_name aggregate.aggregations[agg_name] = new_call - # TODO ADD COMMENTS - agg_alias: str | None = ( - join.default_input_aliases[0] if is_left else join.default_input_aliases[1] - ) - non_agg_alias: str | None = ( - join.default_input_aliases[1] if is_left else join.default_input_aliases[0] - ) + # Similarly, insert a COUNT(*) expression as a sentinel column to use + # to know when there was no matching row from the aggregate side. + if left_join_case and any( + agg.op == pydop.COUNT for agg in aggregate.aggregations.values() + ): + sentinel_join_name: str | None = None + if existing_sentinel is not None: + for col_name, col_expr in join.columns.items(): + if ( + isinstance(col_expr, ColumnReference) + and col_expr.name == existing_sentinel + ): + sentinel_join_name = col_name + break + else: + agg_name = self.generate_name("n_rows", aggregate.columns) + aggregate.columns[agg_name] = aggregate.aggregations[agg_name] = ( + CallExpression( + pydop.COUNT, + NumericType(), + [], + ) + ) + join_name = self.generate_name("n_rows", join.columns) + join.columns[join_name] = ColumnReference( + agg_name, NumericType(), agg_alias + ) + sentinel_join_name = join_name + assert sentinel_join_name is not None + sentinel_column = ColumnReference(sentinel_join_name, NumericType()) # Identify the new cardinality of the join if the aggregate is no longer # happening before the join. @@ -283,6 +318,27 @@ def join_aggregate_transpose( new_aggregate_keys[agg_key.name] = lhs_join_key_agg.inputs[0] join_sub[non_agg_key] = join_sub[agg_key] + # In the left join case, transform any COUNT(col) or COUNT(*) col to + # NULL if the sentinel column is zero, indicating no matching row. + if left_join_case and sentinel_column is not None: + sentinel_cmp: RelationalExpression = CallExpression( + pydop.GRT, + BooleanType(), + [sentinel_column, LiteralExpression(0, NumericType())], + ) + + def sentinel_fn(expr: RelationalExpression) -> RelationalExpression: + return CallExpression( + pydop.KEEP_IF, expr.data_type, [expr, sentinel_cmp] + ) + + for col_name, col_expr in aggregate.aggregations.items(): + if col_expr.op == pydop.COUNT: + agg_ref_expr: ColumnReference = ColumnReference( + col_name, col_expr.data_type, agg_alias + ) + join_sub[agg_ref_expr] = sentinel_fn(join_sub[agg_ref_expr]) + # TODO ADD COMMENTS for col_name, col_expr in join.columns.items(): new_project_columns[col_name] = apply_substitution(col_expr, join_sub, {}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index c059d3ebf..91f7f8939 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', count_c_custkey), ('n_machine_high_domestic_lines', sum_count_o_orderkey), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=count_c_custkey > 0:numeric & sum_count_o_orderkey > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'count_c_custkey': count_c_custkey, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_count_o_orderkey': sum_count_o_orderkey, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'count_c_custkey': COUNT(c_custkey), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_count_o_orderkey': SUM(count_o_orderkey), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey > 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,33 +10,35 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndis SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey > 0:numeric), 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index 939276643..b2a92a72f 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_count_o_orderkey), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_count_o_orderkey > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_count_o_orderkey': sum_count_o_orderkey, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_count_o_orderkey': SUM(count_o_orderkey), 'sum_sum_revenue': SUM(sum_revenue)}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_o_orderkey, count_o_orderkey > 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index 43ac4679b..f5d5cd115 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,30 +1,31 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', count_c_custkey), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) - FILTER(condition=count_c_custkey > 0:numeric, columns={'anything_n_name': anything_n_name, 'count_c_custkey': count_c_custkey, 'n_rows': ndistinct_c_custkey_0, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'ndistinct_c_custkey_0': NDISTINCT(c_custkey_0), 'sum_sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey > 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index 0299a1186..d177eea2b 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', count_c_custkey), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=count_c_custkey > 0:numeric & sum_count_o_orderkey > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'count_c_custkey': count_c_custkey, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'count_c_custkey': COUNT(c_custkey), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_count_o_orderkey': SUM(count_o_orderkey), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey > 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,33 +10,35 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndis SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey > 0:numeric), 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index b10b08bfc..c1a907bad 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndistinct_c_custkey), ('n_machine_high_orders', count_c_custkey), ('n_machine_high_domestic_lines', sum_count_o_orderkey)], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=count_c_custkey > 0:numeric & sum_count_o_orderkey > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'count_c_custkey': count_c_custkey, 'ndistinct_c_custkey': ndistinct_c_custkey, 'sum_count_o_orderkey': sum_count_o_orderkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'count_c_custkey': COUNT(c_custkey), 'ndistinct_c_custkey': NDISTINCT(c_custkey), 'sum_count_o_orderkey': SUM(count_o_orderkey)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey > 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,31 +10,33 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', ndis SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey > 0:numeric)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 1c04c71f6..57210930a 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', count_anything_o_custkey), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(c_custkey):asc_first]) - FILTER(condition=DEFAULT_TO(sum_count_l_orderkey, 0:numeric) > 0:numeric & count_anything_o_custkey > RELAVG(args=[count_anything_o_custkey], partition=[anything_c_nationkey], order=[]) & sum_count_l_orderkey > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'c_custkey': c_custkey, 'count_anything_o_custkey': count_anything_o_custkey}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_nationkey': ANYTHING(c_nationkey), 'anything_n_rows_0': ANYTHING(n_rows_0), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_count_l_orderkey': SUM(count_l_orderkey)}) - JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'count_l_orderkey': t1.count_l_orderkey, 'n_rows_0': t0.n_rows}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric)), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(c_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric) > RELAVG(args=[DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric)], partition=[anything_c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'c_custkey': c_custkey, 'count_anything_o_custkey': count_anything_o_custkey}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_nationkey': ANYTHING(c_nationkey), 'anything_n_rows_0': ANYTHING(n_rows_0), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'n_rows_0': t0.n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) @@ -10,11 +10,12 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', count_anything_o_custkey), ( SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric)}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index a22099028..822795f9e 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,17 +1,17 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', count_o_custkey), ('n_part_purchases', anything_sum_count_l_orderkey)], orderings=[(c_custkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]) & count_o_custkey > 0:numeric, columns={'anything_n_rows': anything_n_rows, 'anything_sum_count_l_orderkey': anything_sum_count_l_orderkey, 'c_custkey': c_custkey, 'count_o_custkey': count_o_custkey}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'anything_sum_count_l_orderkey': ANYTHING(sum_count_l_orderkey), 'count_o_custkey': COUNT(o_custkey)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey, 'sum_count_l_orderkey': t0.sum_count_l_orderkey}) - LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_count_l_orderkey': sum_count_l_orderkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_count_l_orderkey': t1.sum_count_l_orderkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric)), ('n_part_purchases', anything_sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]) & KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric) > 0:numeric, columns={'anything_n_rows': anything_n_rows, 'anything_sum_n_rows': anything_sum_n_rows, 'c_custkey': c_custkey, 'count_o_custkey': count_o_custkey}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'anything_sum_n_rows': ANYTHING(sum_n_rows), 'count_o_custkey': COUNT(o_custkey)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey, 'sum_n_rows': t0.sum_n_rows}) + LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_count_l_orderkey > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_count_l_orderkey': sum_count_l_orderkey}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_count_l_orderkey': SUM(count_l_orderkey)}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric))}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 4222a39ef..c47e8b532 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -5,8 +5,8 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:n SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_count_l_orderkey > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_count_l_orderkey': SUM(count_l_orderkey), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric)), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_quantity': t1.l_quantity, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index b6c733f43..1c751d264 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_count_l_orderkey > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_count_l_orderkey': SUM(count_l_orderkey)}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric))}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index a296ce8e7..86ff1962f 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,10 +1,11 @@ -ROOT(columns=[('name', anything_c_name), ('n_orders', count_anything_o_custkey)], orderings=[(count_anything_o_custkey):desc_last, (anything_c_name):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_count_l_orderkey, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'count_anything_o_custkey': count_anything_o_custkey}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_count_l_orderkey': SUM(count_l_orderkey)}) - JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'count_l_orderkey': t1.count_l_orderkey}) +ROOT(columns=[('name', anything_c_name), ('n_orders', DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric))], orderings=[(DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric)):desc_last, (anything_c_name):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'count_anything_o_custkey': count_anything_o_custkey}) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) + PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric)}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index a347a0e11..4e37a72a8 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,6 +1,6 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(count_search_id > 0:numeric)}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(KEEP_IF(count_search_id, count_search_id > 0:numeric), 0:numeric) > 0:numeric)}) AGGREGATE(keys={'s_name': s_name, 'search_id_0': search_id_0}, aggregations={'count_search_id': COUNT(search_id)}) JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_id_0': t0.search_id}) JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 806e97eb3..abe15cf7f 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,13 +1,14 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_orders', count_l_suppkey)], orderings=[(count_l_suppkey):desc_last, (s_name):asc_first], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'count_l_suppkey': t1.count_l_suppkey, 's_name': t0.s_name}) +ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'n_orders': t1.n_orders, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(count_l_suppkey):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'anything_p_name': anything_p_name, 'n_orders': DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey > 0:numeric), 0:numeric), 'ps_suppkey': ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey > 0:numeric), 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 9404df1e1..8fb94ed79 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(DEFAULT_TO(sum_count_in_device_id, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_count_in_device_id, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.co_id == t1.anything_co_id & t0._id_1 == t1.anything__id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_count_in_device_id': t1.sum_count_in_device_id}) +ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) + JOIN(condition=t0.co_id == t1.anything_co_id & t0._id_1 == t1.anything__id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_count_in_device_id': SUM(count_in_device_id)}) + AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id > 0:numeric))}) AGGREGATE(keys={'de_id': de_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t0.de_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index 50da2a50f..c0e84a31a 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(sum_count_in_device_id, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.anything_us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_count_in_device_id': t1.sum_count_in_device_id, 'sum_n_rows': t0.sum_n_rows}) +ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.anything_us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) @@ -13,7 +13,7 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_count_in_device_id': SUM(count_in_device_id)}) + AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id > 0:numeric))}) AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_us_country_id': ANYTHING(us_country_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'in_device_id': t1.in_device_id, 'us_country_id': t0.us_country_id}) JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 24d59bb6e..179514721 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,11 +1,12 @@ -ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_count_in_device_id, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.anything_de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_count_in_device_id': t1.sum_count_in_device_id}) +ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.anything_de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_count_in_device_id': SUM(count_in_device_id)}) - AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'count_in_device_id': COUNT(in_device_id)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id, 'in_device_id': t1.in_device_id}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + PROJECT(columns={'anything_de_production_country_id': anything_de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric)}) + AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id > 0:numeric))}) + AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'count_in_device_id': COUNT(in_device_id)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id, 'in_device_id': t1.in_device_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 0085817db..6ccfea2ae 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', count_anything_l_suppkey)], orderings=[(count_anything_l_suppkey):desc_last, (anything_s_name):asc_first], limit=10:numeric) +ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_anything_l_suppkey, count_anything_l_suppkey > 0:numeric), 0:numeric))], orderings=[(DEFAULT_TO(KEEP_IF(count_anything_l_suppkey, count_anything_l_suppkey > 0:numeric), 0:numeric)):desc_last, (anything_s_name):asc_first], limit=10:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'count_anything_l_suppkey': COUNT(anything_l_suppkey)}) JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t1.anything_l_suppkey, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index 7d3ebc83a..8d2f8da1a 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=count_o_custkey < RELAVG(args=[count_o_custkey], partition=[], order=[]) & count_o_custkey > 0:numeric, columns={}) + FILTER(condition=DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric)], partition=[], order=[]) & KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric) > 0:numeric, columns={}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index 7d3ebc83a..8d2f8da1a 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=count_o_custkey < RELAVG(args=[count_o_custkey], partition=[], order=[]) & count_o_custkey > 0:numeric, columns={}) + FILTER(condition=DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric)], partition=[], order=[]) & KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric) > 0:numeric, columns={}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index 7d3ebc83a..8d2f8da1a 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=count_o_custkey < RELAVG(args=[count_o_custkey], partition=[], order=[]) & count_o_custkey > 0:numeric, columns={}) + FILTER(condition=DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric)], partition=[], order=[]) & KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric) > 0:numeric, columns={}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 7b1314906..6b4200365 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ABSENT(count_o_custkey) & anything_c_acctbal < RELSUM(args=[count_o_custkey], partition=[], order=[]), columns={}) + FILTER(condition=ABSENT(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric)) & anything_c_acctbal < RELSUM(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) diff --git a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql index 7c25d6cf4..3484bab1a 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - SUM(count_oid) / COUNT(*) AS ratio + COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql index 7c25d6cf4..3484bab1a 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - SUM(count_oid) / COUNT(*) AS ratio + COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql index 51097fc92..7113c283a 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - CAST(SUM(count_oid) AS DOUBLE PRECISION) / COUNT(*) AS ratio + CAST(COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) AS DOUBLE PRECISION) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql index d90d873a6..89e900eeb 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - SUM(count_oid) / COUNT(*) AS ratio + COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql index 6dca6a500..3d8541091 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - CAST(SUM(count_oid) AS REAL) / COUNT(*) AS ratio + CAST(COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) AS REAL) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql index 504cb2598..0e51b548c 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COUNT(publication.cid) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql index b8207083a..be0749a4a 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COUNT(publication.cid) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql index 4635122b3..c19d4ba96 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COUNT(publication.cid) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql index a4ef4321b..930e8d9dd 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COUNT(publication.cid) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql index 492220fdb..1c431aad1 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COUNT(publication.cid) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql index 38b7fbcc1..4ee577bf8 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COUNT(publication.cid) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql index 846c76c9d..6d635c459 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COUNT(publication.cid) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql index 4b7b9b29b..62d45c5ff 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COUNT(publication.cid) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql index 5cfa8fc98..e98cf4862 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COUNT(publication.cid) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql index 4605e9446..4cc815488 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COUNT(publication.cid) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index 1ba9a3495..61bbf9abe 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(cars.make) AS anything_make, ANY_VALUE(cars.model) AS anything_model, @@ -14,8 +14,8 @@ WITH _t0 AS ( SELECT anything_make AS make, anything_model AS model, - SUM(count_car_id) AS num_sales -FROM _t0 + COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales +FROM _t1 GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index 1ba9a3495..61bbf9abe 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(cars.make) AS anything_make, ANY_VALUE(cars.model) AS anything_model, @@ -14,8 +14,8 @@ WITH _t0 AS ( SELECT anything_make AS make, anything_model AS model, - SUM(count_car_id) AS num_sales -FROM _t0 + COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales +FROM _t1 GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index 37b05b43d..0ad515e29 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT MAX(cars.make) AS anything_make, MAX(cars.model) AS anything_model, @@ -14,8 +14,8 @@ WITH _t0 AS ( SELECT anything_make AS make, anything_model AS model, - SUM(count_car_id) AS num_sales -FROM _t0 + COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales +FROM _t1 GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index 7c13d9c3b..02d8d202f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT ANY_VALUE(cars.make) AS anything_make, ANY_VALUE(cars.model) AS anything_model, @@ -14,8 +14,8 @@ WITH _t0 AS ( SELECT anything_make AS make, anything_model AS model, - SUM(count_car_id) AS num_sales -FROM _t0 + COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales +FROM _t1 GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index 37b05b43d..0ad515e29 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT MAX(cars.make) AS anything_make, MAX(cars.model) AS anything_model, @@ -14,8 +14,8 @@ WITH _t0 AS ( SELECT anything_make AS make, anything_model AS model, - SUM(count_car_id) AS num_sales -FROM _t0 + COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales +FROM _t1 GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql index 1d0e2975f..ddd825ef1 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql @@ -7,8 +7,15 @@ WITH _s1 AS ( sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) ) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue + COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + CASE + WHEN ( + CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + ) + THEN COALESCE(SUM(_s1.sale_price), 0) + ELSE NULL + END AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql index 8cf17c3b8..422500123 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql @@ -7,8 +7,15 @@ WITH _s1 AS ( sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) ) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue + COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + CASE + WHEN ( + CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + ) + THEN COALESCE(SUM(_s1.sale_price), 0) + ELSE NULL + END AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql index ef1baa533..de1c96c87 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql @@ -7,8 +7,15 @@ WITH _s1 AS ( sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' ) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue + COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + CASE + WHEN ( + CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + ) + THEN COALESCE(SUM(_s1.sale_price), 0) + ELSE NULL + END AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql index e0fbaeb7a..fb7051b88 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -7,8 +7,15 @@ WITH _s1 AS ( sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) ) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue + COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + CASE + WHEN ( + CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + ) + THEN COALESCE(SUM(_s1.sale_price), 0) + ELSE NULL + END AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql index f8fd3d291..6293037a0 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql @@ -7,8 +7,15 @@ WITH _s1 AS ( sale_date >= DATETIME('now', '-30 day') ) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE WHEN COUNT(_s1.car_id) > 0 THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL END AS total_revenue + COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + CASE + WHEN ( + CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + ) + THEN COALESCE(SUM(_s1.sale_price), 0) + ELSE NULL + END AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 9da610e5d..2b857e2b8 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql index f7f6babf0..2586bc980 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql index 1c7b93bbe..370411bb9 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT MAX(merchants.name) AS merchant_name, - COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql index 3618ff61c..c91a61dd5 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 43b6b1cf6..059284b65 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT MAX(merchants.name) AS merchant_name, - COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql index 03c0e34d0..f8d57e2df 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql @@ -46,7 +46,12 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(count_search_id > 0) AS sum_is_intra_season + SUM( + ( + CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + ) + ) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql index 477cbe21b..ad0a93112 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql @@ -46,7 +46,12 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(count_search_id > 0) AS sum_is_intra_season + SUM( + ( + CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + ) + ) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql index b8fbbb36d..9bdfdc4ba 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql @@ -46,7 +46,16 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(CASE WHEN count_search_id > 0 THEN 1 ELSE 0 END) AS sum_is_intra_season + SUM( + CASE + WHEN ( + CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + ) + THEN 1 + ELSE 0 + END + ) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql index d43061ea8..70db4c91f 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql @@ -46,7 +46,12 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - COUNT_IF(count_search_id > 0) AS sum_is_intra_season + COUNT_IF( + ( + CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + ) + ) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql index 6d6e813ed..2dcfbef2f 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql @@ -48,7 +48,12 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM(count_search_id > 0) AS sum_is_intra_season + SUM( + ( + CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + ) + ) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index 594ad2d7d..4d1f23b91 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, @@ -35,7 +35,7 @@ WITH _s0 AS ( SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, - ROUND(COALESCE(_s9.sum_count_in_device_id, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s9.sum_n_rows, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql index 1ec856c5f..7752beaf4 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, @@ -35,7 +35,7 @@ WITH _s0 AS ( SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, - ROUND(COALESCE(_s9.sum_count_in_device_id, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s9.sum_n_rows, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql index 0fc310b44..fccfab68c 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, @@ -36,7 +36,7 @@ SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, ROUND( - CAST(CAST(COALESCE(_s9.sum_count_in_device_id, 0) AS DOUBLE PRECISION) / COALESCE(_s9.n_rows, 0) AS DECIMAL), + CAST(CAST(COALESCE(_s9.sum_n_rows, 0) AS DOUBLE PRECISION) / COALESCE(_s9.n_rows, 0) AS DECIMAL), 2 ) AS ir FROM _s0 AS _s0 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql index e5298c497..44d390481 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, @@ -35,7 +35,7 @@ WITH _s0 AS ( SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, - ROUND(COALESCE(_s9.sum_count_in_device_id, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s9.sum_n_rows, 0) / COALESCE(_s9.n_rows, 0), 2) AS ir FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index ce558aa04..5701a273c 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, @@ -35,7 +35,7 @@ WITH _s0 AS ( SELECT _s0.co_name AS factory_country, _s1.co_name AS purchase_country, - ROUND(CAST(COALESCE(_s9.sum_count_in_device_id, 0) AS REAL) / COALESCE(_s9.n_rows, 0), 2) AS ir + ROUND(CAST(COALESCE(_s9.sum_n_rows, 0) AS REAL) / COALESCE(_s9.n_rows, 0), 2) AS ir FROM _s0 AS _s0 CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index 743c3d91c..87d0883a7 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 @@ -60,7 +60,7 @@ SELECT countries.co_name AS country_name, ROUND(COALESCE(_s3.sum_n_rows, 0) / _s3.n_rows, 2) AS made_ir, ROUND(COALESCE(_s7.sum_n_rows, 0) / _s7.n_rows, 2) AS sold_ir, - ROUND(COALESCE(_s13.sum_count_in_device_id, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir + ROUND(COALESCE(_s13.sum_n_rows, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir FROM main.countries AS countries JOIN _s3 AS _s3 ON _s3.de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql index 966a4ac61..1956692fa 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 @@ -60,7 +60,7 @@ SELECT COUNTRIES.co_name COLLATE utf8mb4_bin AS country_name, ROUND(COALESCE(_s3.sum_n_rows, 0) / _s3.n_rows, 2) AS made_ir, ROUND(COALESCE(_s7.sum_n_rows, 0) / _s7.n_rows, 2) AS sold_ir, - ROUND(COALESCE(_s13.sum_count_in_device_id, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir + ROUND(COALESCE(_s13.sum_n_rows, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir FROM main.COUNTRIES AS COUNTRIES JOIN _s3 AS _s3 ON COUNTRIES.co_id = _s3.de_production_country_id diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql index ecffcf868..65db32d9f 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 @@ -67,7 +67,7 @@ SELECT 2 ) AS sold_ir, ROUND( - CAST(CAST(COALESCE(_s13.sum_count_in_device_id, 0) AS DOUBLE PRECISION) / COALESCE(_s13.n_rows, 0) AS DECIMAL), + CAST(CAST(COALESCE(_s13.sum_n_rows, 0) AS DOUBLE PRECISION) / COALESCE(_s13.n_rows, 0) AS DECIMAL), 2 ) AS user_ir FROM main.countries AS countries diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql index 0ae77254f..05e18a1a3 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 @@ -60,7 +60,7 @@ SELECT countries.co_name AS country_name, ROUND(COALESCE(_s3.sum_n_rows, 0) / _s3.n_rows, 2) AS made_ir, ROUND(COALESCE(_s7.sum_n_rows, 0) / _s7.n_rows, 2) AS sold_ir, - ROUND(COALESCE(_s13.sum_count_in_device_id, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir + ROUND(COALESCE(_s13.sum_n_rows, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir FROM main.countries AS countries JOIN _s3 AS _s3 ON _s3.de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index 2f2e2bc44..8354819d7 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id + SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 @@ -60,10 +60,7 @@ SELECT countries.co_name AS country_name, ROUND(CAST(COALESCE(_s3.sum_n_rows, 0) AS REAL) / _s3.n_rows, 2) AS made_ir, ROUND(CAST(COALESCE(_s7.sum_n_rows, 0) AS REAL) / _s7.n_rows, 2) AS sold_ir, - ROUND( - CAST(COALESCE(_s13.sum_count_in_device_id, 0) AS REAL) / COALESCE(_s13.n_rows, 0), - 2 - ) AS user_ir + ROUND(CAST(COALESCE(_s13.sum_n_rows, 0) AS REAL) / COALESCE(_s13.n_rows, 0), 2) AS user_ir FROM main.countries AS countries JOIN _s3 AS _s3 ON _s3.de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index d58d6a645..3073139a5 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t2 AS ( SELECT ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(incidents.in_device_id) AS count_in_device_id @@ -11,16 +11,16 @@ WITH _t1 AS ( devices.de_id ), _s5 AS ( SELECT + COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, - COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id - FROM _t1 + COUNT(*) AS n_rows + FROM _t2 GROUP BY - 1 + 2 ) SELECT countries.co_name AS country, - ROUND(COALESCE(_s5.sum_count_in_device_id, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql index 9afda834e..def92eefd 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t2 AS ( SELECT ANY_VALUE(DEVICES.de_production_country_id) AS anything_de_production_country_id, COUNT(INCIDENTS.in_device_id) AS count_in_device_id @@ -11,16 +11,16 @@ WITH _t1 AS ( DEVICES.de_id ), _s5 AS ( SELECT + COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, - COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id - FROM _t1 + COUNT(*) AS n_rows + FROM _t2 GROUP BY - 1 + 2 ) SELECT COUNTRIES.co_name COLLATE utf8mb4_bin AS country, - ROUND(COALESCE(_s5.sum_count_in_device_id, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.COUNTRIES AS COUNTRIES LEFT JOIN _s5 AS _s5 ON COUNTRIES.co_id = _s5.anything_de_production_country_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql index 726ec8133..17c8822a3 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t2 AS ( SELECT MAX(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(incidents.in_device_id) AS count_in_device_id @@ -11,17 +11,17 @@ WITH _t1 AS ( devices.de_id ), _s5 AS ( SELECT + COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, - COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id - FROM _t1 + COUNT(*) AS n_rows + FROM _t2 GROUP BY - 1 + 2 ) SELECT countries.co_name AS country, ROUND( - CAST(CAST(COALESCE(_s5.sum_count_in_device_id, 0) AS DOUBLE PRECISION) / COALESCE(_s5.n_rows, 0) AS DECIMAL), + CAST(CAST(COALESCE(_s5.sum_n_incidents, 0) AS DOUBLE PRECISION) / COALESCE(_s5.n_rows, 0) AS DECIMAL), 2 ) AS ir FROM main.countries AS countries diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql index f2fbf2e37..139963531 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t2 AS ( SELECT ANY_VALUE(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(incidents.in_device_id) AS count_in_device_id @@ -11,16 +11,16 @@ WITH _t1 AS ( devices.de_id ), _s5 AS ( SELECT + COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, - COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id - FROM _t1 + COUNT(*) AS n_rows + FROM _t2 GROUP BY - 1 + 2 ) SELECT countries.co_name AS country, - ROUND(COALESCE(_s5.sum_count_in_device_id, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index daad93038..ea89dbc6e 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t2 AS ( SELECT MAX(devices.de_production_country_id) AS anything_de_production_country_id, COUNT(incidents.in_device_id) AS count_in_device_id @@ -11,16 +11,16 @@ WITH _t1 AS ( devices.de_id ), _s5 AS ( SELECT + COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, - COUNT(*) AS n_rows, - SUM(count_in_device_id) AS sum_count_in_device_id - FROM _t1 + COUNT(*) AS n_rows + FROM _t2 GROUP BY - 1 + 2 ) SELECT countries.co_name AS country, - ROUND(CAST(COALESCE(_s5.sum_count_in_device_id, 0) AS REAL) / COALESCE(_s5.n_rows, 0), 2) AS ir + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / COALESCE(_s5.n_rows, 0), 2) AS ir FROM main.countries AS countries LEFT JOIN _s5 AS _s5 ON _s5.anything_de_production_country_id = countries.co_id diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index a53a1e9b5..c8ffe55d0 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -49,7 +49,14 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COALESCE( + CASE + WHEN COUNT(_s13.anything_l_suppkey) > 0 + THEN COUNT(_s13.anything_l_suppkey) + ELSE NULL + END, + 0 + ) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index 302c196ca..5bc071986 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -53,7 +53,14 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COALESCE( + CASE + WHEN COUNT(_s13.anything_l_suppkey) > 0 + THEN COUNT(_s13.anything_l_suppkey) + ELSE NULL + END, + 0 + ) AS NUMWAIT FROM tpch.SUPPLIER AS SUPPLIER JOIN tpch.NATION AS NATION ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index 79c4527cd..841b4d29c 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -53,7 +53,14 @@ WITH _t5 AS ( ) SELECT MAX(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COALESCE( + CASE + WHEN COUNT(_s13.anything_l_suppkey) > 0 + THEN COUNT(_s13.anything_l_suppkey) + ELSE NULL + END, + 0 + ) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index aa6a0445b..eca4edbcd 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -53,7 +53,14 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COALESCE( + CASE + WHEN COUNT(_s13.anything_l_suppkey) > 0 + THEN COUNT(_s13.anything_l_suppkey) + ELSE NULL + END, + 0 + ) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index c5ceb7d67..477a8ce99 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -53,7 +53,14 @@ WITH _t5 AS ( ) SELECT MAX(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COALESCE( + CASE + WHEN COUNT(_s13.anything_l_suppkey) > 0 + THEN COUNT(_s13.anything_l_suppkey) + ELSE NULL + END, + 0 + ) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey From 8991a3e7c0a36339c2328b555fbfaad8ed6bda05 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 17:29:29 -0400 Subject: [PATCH 118/143] Teeing up CASE -> NULLIF rewrite --- pydough/conversion/hybrid_syncretizer.py | 2 +- pydough/conversion/hybrid_tree.py | 2 +- .../conversion/join_aggregate_transpose.py | 2 +- pydough/sqlglot/override_simplify.py | 21 +++++++++++++++++++ tests/test_plan_refsols/bad_child_reuse_1.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_4.txt | 2 +- tests/test_plan_refsols/common_prefix_af.txt | 2 +- tests/test_plan_refsols/common_prefix_ag.txt | 6 +++--- tests/test_plan_refsols/common_prefix_ah.txt | 4 ++-- tests/test_plan_refsols/common_prefix_ai.txt | 4 ++-- tests/test_plan_refsols/common_prefix_aj.txt | 6 +++--- tests/test_plan_refsols/common_prefix_ak.txt | 6 +++--- tests/test_plan_refsols/common_prefix_am.txt | 2 +- tests/test_plan_refsols/common_prefix_an.txt | 6 +++--- tests/test_plan_refsols/common_prefix_ao.txt | 8 +++---- tests/test_plan_refsols/common_prefix_i.txt | 2 +- tests/test_plan_refsols/common_prefix_o.txt | 2 +- tests/test_plan_refsols/common_prefix_r.txt | 2 +- tests/test_plan_refsols/common_prefix_u.txt | 4 ++-- tests/test_plan_refsols/common_prefix_x.txt | 4 ++-- tests/test_plan_refsols/common_prefix_y.txt | 4 ++-- .../epoch_intra_season_searches.txt | 2 +- ...lth_first_patient_by_coverage_type_raw.txt | 2 +- ...first_patient_by_coverage_type_rewrite.txt | 2 +- tests/test_plan_refsols/singular5.txt | 2 +- tests/test_plan_refsols/singular7.txt | 4 ++-- tests/test_plan_refsols/sqlite_udf_nested.txt | 2 +- ...chnograph_country_combination_analysis.txt | 2 +- ...nograph_country_incident_rate_analysis.txt | 2 +- ..._error_rate_sun_set_by_factory_country.txt | 2 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- .../window_filter_order_1.txt | 2 +- .../window_filter_order_2.txt | 2 +- .../window_filter_order_3.txt | 2 +- .../window_filter_order_8.txt | 2 +- .../defog_academic_gen15_ansi.sql | 2 +- .../defog_academic_gen15_mysql.sql | 2 +- .../defog_academic_gen15_postgres.sql | 2 +- .../defog_academic_gen15_snowflake.sql | 2 +- .../defog_academic_gen15_sqlite.sql | 2 +- .../defog_academic_gen17_ansi.sql | 2 +- .../defog_academic_gen17_mysql.sql | 2 +- .../defog_academic_gen17_postgres.sql | 2 +- .../defog_academic_gen17_snowflake.sql | 2 +- .../defog_academic_gen17_sqlite.sql | 2 +- .../defog_academic_gen19_ansi.sql | 2 +- .../defog_academic_gen19_mysql.sql | 2 +- .../defog_academic_gen19_postgres.sql | 2 +- .../defog_academic_gen19_snowflake.sql | 2 +- .../defog_academic_gen19_sqlite.sql | 2 +- .../defog_dealership_adv3_ansi.sql | 2 +- .../defog_dealership_adv3_mysql.sql | 2 +- .../defog_dealership_adv3_postgres.sql | 2 +- .../defog_dealership_adv3_snowflake.sql | 2 +- .../defog_dealership_adv3_sqlite.sql | 2 +- .../defog_dealership_adv4_ansi.sql | 6 +++--- .../defog_dealership_adv4_mysql.sql | 6 +++--- .../defog_dealership_adv4_postgres.sql | 6 +++--- .../defog_dealership_adv4_snowflake.sql | 6 +++--- .../defog_dealership_adv4_sqlite.sql | 6 +++--- .../defog_dermtreatment_basic1_ansi.sql | 2 +- .../defog_dermtreatment_basic1_mysql.sql | 2 +- .../defog_dermtreatment_basic1_postgres.sql | 2 +- .../defog_dermtreatment_basic1_snowflake.sql | 2 +- .../defog_dermtreatment_basic1_sqlite.sql | 2 +- .../defog_ewallet_basic10_ansi.sql | 2 +- .../defog_ewallet_basic10_mysql.sql | 2 +- .../defog_ewallet_basic10_postgres.sql | 2 +- .../defog_ewallet_basic10_snowflake.sql | 2 +- .../defog_ewallet_basic10_sqlite.sql | 2 +- .../defog_ewallet_basic8_ansi.sql | 5 ++++- .../defog_ewallet_basic8_mysql.sql | 5 ++++- .../defog_ewallet_basic8_postgres.sql | 5 ++++- .../defog_ewallet_basic8_snowflake.sql | 5 ++++- .../defog_ewallet_basic8_sqlite.sql | 5 ++++- .../epoch_intra_season_searches_ansi.sql | 4 ++-- .../epoch_intra_season_searches_mysql.sql | 4 ++-- .../epoch_intra_season_searches_postgres.sql | 4 ++-- .../epoch_intra_season_searches_snowflake.sql | 4 ++-- .../epoch_intra_season_searches_sqlite.sql | 4 ++-- ...patient_by_coverage_type_raw_snowflake.sql | 2 +- ...ent_by_coverage_type_rewrite_snowflake.sql | 2 +- .../sqlite_udf_nested_sqlite.sql | 2 +- ...raph_country_combination_analysis_ansi.sql | 2 +- ...aph_country_combination_analysis_mysql.sql | 2 +- ..._country_combination_analysis_postgres.sql | 2 +- ...country_combination_analysis_snowflake.sql | 2 +- ...ph_country_combination_analysis_sqlite.sql | 2 +- ...ph_country_incident_rate_analysis_ansi.sql | 2 +- ...h_country_incident_rate_analysis_mysql.sql | 2 +- ...ountry_incident_rate_analysis_postgres.sql | 2 +- ...untry_incident_rate_analysis_snowflake.sql | 2 +- ..._country_incident_rate_analysis_sqlite.sql | 2 +- ...r_rate_sun_set_by_factory_country_ansi.sql | 2 +- ..._rate_sun_set_by_factory_country_mysql.sql | 2 +- ...te_sun_set_by_factory_country_postgres.sql | 2 +- ...e_sun_set_by_factory_country_snowflake.sql | 2 +- ...rate_sun_set_by_factory_country_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q21_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q21_mysql.sql | 2 +- tests/test_sql_refsols/tpch_q21_postgres.sql | 2 +- tests/test_sql_refsols/tpch_q21_snowflake.sql | 2 +- tests/test_sql_refsols/tpch_q21_sqlite.sql | 2 +- 105 files changed, 172 insertions(+), 136 deletions(-) diff --git a/pydough/conversion/hybrid_syncretizer.py b/pydough/conversion/hybrid_syncretizer.py index 0460b07e0..92966e107 100644 --- a/pydough/conversion/hybrid_syncretizer.py +++ b/pydough/conversion/hybrid_syncretizer.py @@ -224,7 +224,7 @@ def add_extension_semi_anti_count_filter( HybridFilter( tree.pipeline[-1], HybridFunctionExpr( - pydop.GRT if is_semi else pydop.EQU, + pydop.NEQ if is_semi else pydop.EQU, [agg_ref, literal_zero], BooleanType(), ), diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 8f4306c57..2f872229c 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -323,7 +323,7 @@ def insert_count_filter(self, child_idx: int, is_semi: bool) -> None: condition: HybridExpr if is_semi: condition = HybridFunctionExpr( - pydop.GRT, + pydop.NEQ, [result_ref, HybridLiteralExpr(Literal(0, NumericType()))], BooleanType(), ) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index a73390196..5a9bc75fd 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -322,7 +322,7 @@ def join_aggregate_transpose( # NULL if the sentinel column is zero, indicating no matching row. if left_join_case and sentinel_column is not None: sentinel_cmp: RelationalExpression = CallExpression( - pydop.GRT, + pydop.NEQ, BooleanType(), [sentinel_column, LiteralExpression(0, NumericType())], ) diff --git a/pydough/sqlglot/override_simplify.py b/pydough/sqlglot/override_simplify.py index c1b695a17..27d838011 100644 --- a/pydough/sqlglot/override_simplify.py +++ b/pydough/sqlglot/override_simplify.py @@ -124,6 +124,9 @@ def _simplify(expression, root=True): node = simplify_concat(node) node = simplify_conditionals(node) + # PyDough Change: new pre-order transformations + node = rewrite_case_nullif(node) + if constant_propagation: node = propagate_constants(node, root) @@ -225,3 +228,21 @@ def simplify_datetrunc(expression: exp.Expression, dialect: Dialect) -> exp.Expr ) return expression + + +def rewrite_case_nullif(expr: exp.Expression) -> exp.Expression: + """ + Rewrite expressions like `CASE WHEN x != y THEN x ELSE NULL END` to + `NULLIF(x, y)` + + Args: + `expr`: The expression to rewrite. + + Returns: + The rewritten expression. + """ + if not isinstance(expr, exp.Case): + return expr + + # breakpoint() + return expr diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index 48954aace..8a5eb5c7d 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_rows)], orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) + FILTER(condition=n_rows != 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 133351928..cc52f1bfb 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_rows), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + FILTER(condition=n_rows != 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 133351928..cc52f1bfb 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_rows), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + FILTER(condition=n_rows != 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index ce2a967a8..d9b891dea 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_rows)], orderings=[(c_acctbal):desc_last], limit=10:numeric) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) + FILTER(condition=n_rows != 0:numeric & DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index 2ab722973..68ddbeb70 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -4,7 +4,7 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name' SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_c_name': max_c_name, 'n_rows': n_rows}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'c_nationkey': c_nationkey, 'max_c_name': max_c_name, 'n_rows': n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_name': MAX(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 91f7f8939..2e983d6d3 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey > 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey > 0:numeric), 'sum_revenue': sum_revenue}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey != 0:numeric), 'sum_revenue': sum_revenue}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index b2a92a72f..c12b30057 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_o_orderkey, count_o_orderkey > 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_o_orderkey, count_o_orderkey != 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index f5d5cd115..ad071af12 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey > 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index d177eea2b..dc94f8c6d 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey > 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey > 0:numeric), 'sum_revenue': sum_revenue}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey != 0:numeric), 'sum_revenue': sum_revenue}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index c1a907bad..bd19864d4 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_anything_n_name):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey > 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows)}) + FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey > 0:numeric)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey != 0:numeric)}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 6af0ed73b..80a5efb02 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -6,7 +6,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numer SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) + FILTER(condition=sum_n_rows != 0:numeric & sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 57210930a..a2198f90f 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric)), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(c_custkey):asc_first]) - FILTER(condition=DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric) > RELAVG(args=[DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric)], partition=[anything_c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'c_custkey': c_custkey, 'count_anything_o_custkey': count_anything_o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric)), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(c_custkey):asc_first]) + FILTER(condition=sum_n_rows != 0:numeric & DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric) > RELAVG(args=[DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric)], partition=[anything_c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'c_custkey': c_custkey, 'count_anything_o_custkey': count_anything_o_custkey}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_nationkey': ANYTHING(c_nationkey), 'anything_n_rows_0': ANYTHING(n_rows_0), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'n_rows_0': t0.n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) @@ -10,7 +10,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(KEEP_IF(count_any SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric)}) + PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric)}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 822795f9e..48ac17aa7 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric)), ('n_part_purchases', anything_sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]) & KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric) > 0:numeric, columns={'anything_n_rows': anything_n_rows, 'anything_sum_n_rows': anything_sum_n_rows, 'c_custkey': c_custkey, 'count_o_custkey': count_o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, 0:numeric)), ('n_no_tax_discount', KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric)), ('n_part_purchases', anything_sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) + FILTER(condition=KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric) != 0:numeric & DEFAULT_TO(anything_n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(anything_n_rows, 0:numeric)], partition=[], order=[]), columns={'anything_n_rows': anything_n_rows, 'anything_sum_n_rows': anything_sum_n_rows, 'c_custkey': c_custkey, 'count_o_custkey': count_o_custkey}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'anything_sum_n_rows': ANYTHING(sum_n_rows), 'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'o_custkey': t1.o_custkey, 'sum_n_rows': t0.sum_n_rows}) LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) @@ -10,8 +10,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric))}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric))}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index 273489ad8..f72b7a314 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', sum_n_rows)], orderings=[(n_rows):desc_last, (n_name):asc_first], limit=5:numeric) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 1c257b121..fe0307f7b 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -4,7 +4,7 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', D JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_sum_n_rows > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) + FILTER(condition=sum_sum_sum_n_rows != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_n_rows': t0.sum_sum_n_rows}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 2f9ad6dc7..1adea801e 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_l_extendedprice), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_extendedprice': max_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'max_anything_l_extendedprice': max_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_l_extendedprice': MAX(anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_n_rows': SUM(n_rows), 'sum_o_totalprice': SUM(o_totalprice)}) JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_extendedprice': t1.anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index c47e8b532..a5309f7f7 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -5,8 +5,8 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:n SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric)), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric)), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_quantity': t1.l_quantity, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 1c751d264..573e4103b 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) - AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric))}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) + AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric))}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 86ff1962f..931501010 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('name', anything_c_name), ('n_orders', DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric))], orderings=[(DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey > 0:numeric), 0:numeric)):desc_last, (anything_c_name):asc_first], limit=5:numeric) +ROOT(columns=[('name', anything_c_name), ('n_orders', DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric))], orderings=[(DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric)):desc_last, (anything_c_name):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'count_anything_o_custkey': count_anything_o_custkey}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey > 0:numeric)}) + PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric)}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 4e37a72a8..343d44793 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,6 +1,6 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(KEEP_IF(count_search_id, count_search_id > 0:numeric), 0:numeric) > 0:numeric)}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(KEEP_IF(count_search_id, count_search_id != 0:numeric), 0:numeric) > 0:numeric)}) AGGREGATE(keys={'s_name': s_name, 'search_id_0': search_id_0}, aggregations={'count_search_id': COUNT(search_id)}) JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_id_0': t0.search_id}) JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) diff --git a/tests/test_plan_refsols/health_first_patient_by_coverage_type_raw.txt b/tests/test_plan_refsols/health_first_patient_by_coverage_type_raw.txt index 2dc68ad4f..6f8a957da 100644 --- a/tests/test_plan_refsols/health_first_patient_by_coverage_type_raw.txt +++ b/tests/test_plan_refsols/health_first_patient_by_coverage_type_raw.txt @@ -1,5 +1,5 @@ ROOT(columns=[('coverage_type', coverage_type), ('first_name', max_anything_unmask_first_name), ('last_name', max_anything_unmask_last_name), ('date_of_birth', max_anything_unmask_date_of_birth)], orderings=[(coverage_type):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'coverage_type': coverage_type, 'max_anything_unmask_date_of_birth': max_anything_unmask_date_of_birth, 'max_anything_unmask_first_name': max_anything_unmask_first_name, 'max_anything_unmask_last_name': max_anything_unmask_last_name}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'coverage_type': coverage_type, 'max_anything_unmask_date_of_birth': max_anything_unmask_date_of_birth, 'max_anything_unmask_first_name': max_anything_unmask_first_name, 'max_anything_unmask_last_name': max_anything_unmask_last_name}) AGGREGATE(keys={'coverage_type': coverage_type}, aggregations={'max_anything_unmask_date_of_birth': MAX(anything_unmask_date_of_birth), 'max_anything_unmask_first_name': MAX(anything_unmask_first_name), 'max_anything_unmask_last_name': MAX(anything_unmask_last_name), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.insurance_plan_id == t1.insurance_plan_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_unmask_date_of_birth': t1.anything_unmask_date_of_birth, 'anything_unmask_first_name': t1.anything_unmask_first_name, 'anything_unmask_last_name': t1.anything_unmask_last_name, 'coverage_type': t0.coverage_type, 'n_rows': t1.n_rows}) SCAN(table=bodo.health.insurance_plans, columns={'coverage_type': coverage_type, 'insurance_plan_id': insurance_plan_id}) diff --git a/tests/test_plan_refsols/health_first_patient_by_coverage_type_rewrite.txt b/tests/test_plan_refsols/health_first_patient_by_coverage_type_rewrite.txt index 2dc68ad4f..6f8a957da 100644 --- a/tests/test_plan_refsols/health_first_patient_by_coverage_type_rewrite.txt +++ b/tests/test_plan_refsols/health_first_patient_by_coverage_type_rewrite.txt @@ -1,5 +1,5 @@ ROOT(columns=[('coverage_type', coverage_type), ('first_name', max_anything_unmask_first_name), ('last_name', max_anything_unmask_last_name), ('date_of_birth', max_anything_unmask_date_of_birth)], orderings=[(coverage_type):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'coverage_type': coverage_type, 'max_anything_unmask_date_of_birth': max_anything_unmask_date_of_birth, 'max_anything_unmask_first_name': max_anything_unmask_first_name, 'max_anything_unmask_last_name': max_anything_unmask_last_name}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'coverage_type': coverage_type, 'max_anything_unmask_date_of_birth': max_anything_unmask_date_of_birth, 'max_anything_unmask_first_name': max_anything_unmask_first_name, 'max_anything_unmask_last_name': max_anything_unmask_last_name}) AGGREGATE(keys={'coverage_type': coverage_type}, aggregations={'max_anything_unmask_date_of_birth': MAX(anything_unmask_date_of_birth), 'max_anything_unmask_first_name': MAX(anything_unmask_first_name), 'max_anything_unmask_last_name': MAX(anything_unmask_last_name), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.insurance_plan_id == t1.insurance_plan_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_unmask_date_of_birth': t1.anything_unmask_date_of_birth, 'anything_unmask_first_name': t1.anything_unmask_first_name, 'anything_unmask_last_name': t1.anything_unmask_last_name, 'coverage_type': t0.coverage_type, 'n_rows': t1.n_rows}) SCAN(table=bodo.health.insurance_plans, columns={'coverage_type': coverage_type, 'insurance_plan_id': insurance_plan_id}) diff --git a/tests/test_plan_refsols/singular5.txt b/tests/test_plan_refsols/singular5.txt index 44b340fcb..5c0c74ea9 100644 --- a/tests/test_plan_refsols/singular5.txt +++ b/tests/test_plan_refsols/singular5.txt @@ -1,5 +1,5 @@ ROOT(columns=[('container', p_container), ('highest_price_ship_date', max_anything_l_shipdate)], orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first], limit=5:numeric) - FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}) AGGREGATE(keys={'p_container': p_container}, aggregations={'max_anything_l_shipdate': MAX(anything_l_shipdate), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index abe15cf7f..2e54bea89 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -2,8 +2,8 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_ord JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_p_name': t1.anything_p_name, 'n_orders': t1.n_orders, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'anything_p_name': anything_p_name, 'n_orders': DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey > 0:numeric), 0:numeric), 'ps_suppkey': ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey > 0:numeric), 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'anything_p_name': anything_p_name, 'n_orders': DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric), 'ps_suppkey': ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_nested.txt b/tests/test_plan_refsols/sqlite_udf_nested.txt index cd86522db..91814b415 100644 --- a/tests/test_plan_refsols/sqlite_udf_nested.txt +++ b/tests/test_plan_refsols/sqlite_udf_nested.txt @@ -1,6 +1,6 @@ ROOT(columns=[('p', ROUND(percentage_expr, 2:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'percentage_expr': PERCENTAGE(DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool))}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) + FILTER(condition=n_rows != 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 8fb94ed79..5d9a3cfbd 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -3,7 +3,7 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id > 0:numeric))}) + AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id != 0:numeric))}) AGGREGATE(keys={'de_id': de_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t0.de_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index c0e84a31a..16b5206d0 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -13,7 +13,7 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id > 0:numeric))}) + AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id != 0:numeric))}) AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_us_country_id': ANYTHING(us_country_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'in_device_id': t1.in_device_id, 'us_country_id': t0.us_country_id}) JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 179514721..a594887a4 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -2,7 +2,7 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:n JOIN(condition=t0.co_id == t1.anything_de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) PROJECT(columns={'anything_de_production_country_id': anything_de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric)}) - AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id > 0:numeric))}) + AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id != 0:numeric))}) AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 6ccfea2ae..d573d9ff5 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_anything_l_suppkey, count_anything_l_suppkey > 0:numeric), 0:numeric))], orderings=[(DEFAULT_TO(KEEP_IF(count_anything_l_suppkey, count_anything_l_suppkey > 0:numeric), 0:numeric)):desc_last, (anything_s_name):asc_first], limit=10:numeric) +ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_anything_l_suppkey, count_anything_l_suppkey != 0:numeric), 0:numeric))], orderings=[(DEFAULT_TO(KEEP_IF(count_anything_l_suppkey, count_anything_l_suppkey != 0:numeric), 0:numeric)):desc_last, (anything_s_name):asc_first], limit=10:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'count_anything_l_suppkey': COUNT(anything_l_suppkey)}) JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t1.anything_l_suppkey, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index 8d2f8da1a..21d4fcb51 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric)], partition=[], order=[]) & KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric) > 0:numeric, columns={}) + FILTER(condition=KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric) != 0:numeric & DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index 8d2f8da1a..21d4fcb51 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric)], partition=[], order=[]) & KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric) > 0:numeric, columns={}) + FILTER(condition=KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric) != 0:numeric & DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index 8d2f8da1a..21d4fcb51 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric)], partition=[], order=[]) & KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric) > 0:numeric, columns={}) + FILTER(condition=KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric) != 0:numeric & DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 6b4200365..254011631 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ABSENT(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric)) & anything_c_acctbal < RELSUM(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey > 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) + FILTER(condition=ABSENT(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric)) & anything_c_acctbal < RELSUM(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) diff --git a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql index 3484bab1a..ac6ecaa8b 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio + COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql index 3484bab1a..ac6ecaa8b 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio + COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql index 7113c283a..de72ee868 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - CAST(COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) AS DOUBLE PRECISION) / COUNT(*) AS ratio + CAST(COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) AS DOUBLE PRECISION) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql index 89e900eeb..e18408503 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio + COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql index 3d8541091..a5e24526b 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - CAST(COALESCE(SUM(CASE WHEN count_oid > 0 THEN count_oid ELSE NULL END), 0) AS REAL) / COUNT(*) AS ratio + CAST(COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) AS REAL) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql index 0e51b548c..286c4a7e3 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql index be0749a4a..cf49f6bca 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql index c19d4ba96..5548d7996 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql index 930e8d9dd..25eb987d3 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql index 1c431aad1..54502b0af 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql index 4ee577bf8..b8a7f31c7 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql index 6d635c459..866d80622 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql index 62d45c5ff..244567dc4 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql index e98cf4862..3b3b803af 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql index 4cc815488..110473596 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) > 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index 61bbf9abe..2e56fb6cf 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index 61bbf9abe..2e56fb6cf 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index 0ad515e29..cda148aab 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index 02d8d202f..954ae6bad 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index 0ad515e29..cda148aab 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id > 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql index ddd825ef1..c4dc34942 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql @@ -7,11 +7,11 @@ WITH _s1 AS ( sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql index 422500123..b4e0ae20c 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql @@ -7,11 +7,11 @@ WITH _s1 AS ( sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql index de1c96c87..0bc1b1044 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql @@ -7,11 +7,11 @@ WITH _s1 AS ( sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql index fb7051b88..3c433feb5 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -7,11 +7,11 @@ WITH _s1 AS ( sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql index 6293037a0..55c0bbf21 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql @@ -7,11 +7,11 @@ WITH _s1 AS ( sale_date >= DATETIME('now', '-30 day') ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) > 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 + AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic1_ansi.sql b/tests/test_sql_refsols/defog_dermtreatment_basic1_ansi.sql index 36995e1d9..8e3e0d64f 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic1_ansi.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic1_ansi.sql @@ -25,7 +25,7 @@ SELECT COALESCE(sum_sum_tot_drug_amt, 0) AS total_drug_amount FROM _t1 WHERE - sum_n_rows > 0 + sum_n_rows <> 0 ORDER BY 3 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic1_mysql.sql b/tests/test_sql_refsols/defog_dermtreatment_basic1_mysql.sql index 32b7d7fe8..66b68eab6 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic1_mysql.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic1_mysql.sql @@ -31,7 +31,7 @@ SELECT COALESCE(sum_sum_tot_drug_amt, 0) AS total_drug_amount FROM _t1 WHERE - sum_n_rows > 0 + sum_n_rows <> 0 ORDER BY 3 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic1_postgres.sql b/tests/test_sql_refsols/defog_dermtreatment_basic1_postgres.sql index 61f47f4bc..4fac57f02 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic1_postgres.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic1_postgres.sql @@ -31,7 +31,7 @@ SELECT COALESCE(sum_sum_tot_drug_amt, 0) AS total_drug_amount FROM _t1 WHERE - sum_n_rows > 0 + sum_n_rows <> 0 ORDER BY 3 DESC NULLS LAST LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic1_snowflake.sql b/tests/test_sql_refsols/defog_dermtreatment_basic1_snowflake.sql index cda7df693..30ff60d4a 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic1_snowflake.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic1_snowflake.sql @@ -25,7 +25,7 @@ SELECT COALESCE(sum_sum_tot_drug_amt, 0) AS total_drug_amount FROM _t1 WHERE - sum_n_rows > 0 + sum_n_rows <> 0 ORDER BY 3 DESC NULLS LAST LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic1_sqlite.sql b/tests/test_sql_refsols/defog_dermtreatment_basic1_sqlite.sql index e2b483a5d..66fdbec77 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic1_sqlite.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic1_sqlite.sql @@ -29,7 +29,7 @@ SELECT COALESCE(sum_sum_tot_drug_amt, 0) AS total_drug_amount FROM _t1 WHERE - sum_n_rows > 0 + sum_n_rows <> 0 ORDER BY 3 DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 2b857e2b8..4c667c939 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql index 2586bc980..2e5188339 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql index 370411bb9..60518b318 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT MAX(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql index c91a61dd5..2d71bf033 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 059284b65..2226c08c1 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT MAX(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) > 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index fe8b9ea5a..9519f26b9 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -1,6 +1,9 @@ SELECT ANY_VALUE(coupons.code) AS coupon_code, - COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE( + CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, + 0 + ) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql index fe8b9ea5a..9519f26b9 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql @@ -1,6 +1,9 @@ SELECT ANY_VALUE(coupons.code) AS coupon_code, - COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE( + CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, + 0 + ) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql index a2ac158c4..456e768b8 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql @@ -1,6 +1,9 @@ SELECT MAX(coupons.code) AS coupon_code, - COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE( + CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, + 0 + ) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql index f2308b204..2d38be644 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql @@ -1,6 +1,9 @@ SELECT ANY_VALUE(coupons.code) AS coupon_code, - COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE( + CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, + 0 + ) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index c40521c08..0142da0d8 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -1,6 +1,9 @@ SELECT MAX(coupons.code) AS coupon_code, - COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE( + CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, + 0 + ) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql index f8d57e2df..7c3fa09e1 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql @@ -48,8 +48,8 @@ WITH _s0 AS ( COUNT(*) AS n_rows, SUM( ( - CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL ) ) AS sum_is_intra_season FROM _t1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql index ad0a93112..6a16f47ab 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql @@ -48,8 +48,8 @@ WITH _s0 AS ( COUNT(*) AS n_rows, SUM( ( - CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL ) ) AS sum_is_intra_season FROM _t1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql index 9bdfdc4ba..446f0aa62 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql @@ -49,8 +49,8 @@ WITH _s0 AS ( SUM( CASE WHEN ( - CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL ) THEN 1 ELSE 0 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql index 70db4c91f..6195f1e37 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql @@ -48,8 +48,8 @@ WITH _s0 AS ( COUNT(*) AS n_rows, COUNT_IF( ( - CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL ) ) AS sum_is_intra_season FROM _t1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql index 2dcfbef2f..204d8d3bd 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql @@ -50,8 +50,8 @@ WITH _s0 AS ( COUNT(*) AS n_rows, SUM( ( - CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id > 0 THEN count_search_id ELSE NULL END IS NULL + CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 + AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL ) ) AS sum_is_intra_season FROM _t1 diff --git a/tests/test_sql_refsols/health_first_patient_by_coverage_type_raw_snowflake.sql b/tests/test_sql_refsols/health_first_patient_by_coverage_type_raw_snowflake.sql index 315a54897..9d2ce7b2f 100644 --- a/tests/test_sql_refsols/health_first_patient_by_coverage_type_raw_snowflake.sql +++ b/tests/test_sql_refsols/health_first_patient_by_coverage_type_raw_snowflake.sql @@ -39,6 +39,6 @@ SELECT max_anything_unmask_date_of_birth AS date_of_birth FROM _t0 WHERE - sum_n_rows > 0 + sum_n_rows <> 0 ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/health_first_patient_by_coverage_type_rewrite_snowflake.sql b/tests/test_sql_refsols/health_first_patient_by_coverage_type_rewrite_snowflake.sql index 315a54897..9d2ce7b2f 100644 --- a/tests/test_sql_refsols/health_first_patient_by_coverage_type_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/health_first_patient_by_coverage_type_rewrite_snowflake.sql @@ -39,6 +39,6 @@ SELECT max_anything_unmask_date_of_birth AS date_of_birth FROM _t0 WHERE - sum_n_rows > 0 + sum_n_rows <> 0 ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql index d6d15ac93..6c5a99b67 100644 --- a/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql @@ -39,4 +39,4 @@ SELECT ) AS p FROM _t2 WHERE - n_rows > 0 + n_rows <> 0 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index 4d1f23b91..58fd231a6 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql index 7752beaf4..dc13b2fb2 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql index fccfab68c..209791276 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql index 44d390481..09346adb4 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index 5701a273c..a40016e67 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index 87d0883a7..0bc9bd4b2 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql index 1956692fa..8aeb57830 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql index 65db32d9f..5e3d70538 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql index 05e18a1a3..9711eaa46 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index 8354819d7..503174b3c 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index 3073139a5..7c785df60 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql index def92eefd..8a3d4d7b2 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( DEVICES.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql index 17c8822a3..8a33e5c9f 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql index 139963531..9f149a6f1 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index ea89dbc6e..692772d12 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id > 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index c8ffe55d0..7cea9ff0c 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -51,7 +51,7 @@ SELECT ANY_VALUE(supplier.s_name) AS S_NAME, COALESCE( CASE - WHEN COUNT(_s13.anything_l_suppkey) > 0 + WHEN COUNT(_s13.anything_l_suppkey) <> 0 THEN COUNT(_s13.anything_l_suppkey) ELSE NULL END, diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index 5bc071986..77437cd1a 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -55,7 +55,7 @@ SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, COALESCE( CASE - WHEN COUNT(_s13.anything_l_suppkey) > 0 + WHEN COUNT(_s13.anything_l_suppkey) <> 0 THEN COUNT(_s13.anything_l_suppkey) ELSE NULL END, diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index 841b4d29c..fe5d9711c 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -55,7 +55,7 @@ SELECT MAX(supplier.s_name) AS S_NAME, COALESCE( CASE - WHEN COUNT(_s13.anything_l_suppkey) > 0 + WHEN COUNT(_s13.anything_l_suppkey) <> 0 THEN COUNT(_s13.anything_l_suppkey) ELSE NULL END, diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index eca4edbcd..b30608553 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -55,7 +55,7 @@ SELECT ANY_VALUE(supplier.s_name) AS S_NAME, COALESCE( CASE - WHEN COUNT(_s13.anything_l_suppkey) > 0 + WHEN COUNT(_s13.anything_l_suppkey) <> 0 THEN COUNT(_s13.anything_l_suppkey) ELSE NULL END, diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index 477a8ce99..2a8b65cd4 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -55,7 +55,7 @@ SELECT MAX(supplier.s_name) AS S_NAME, COALESCE( CASE - WHEN COUNT(_s13.anything_l_suppkey) > 0 + WHEN COUNT(_s13.anything_l_suppkey) <> 0 THEN COUNT(_s13.anything_l_suppkey) ELSE NULL END, From eb17fe6e4afaf1d5e9d7421e88e1ee9c34bbbcfd Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 17:42:59 -0400 Subject: [PATCH 119/143] Coalesce nullif rewrite WIP --- pydough/sqlglot/override_simplify.py | 72 ++++++++++++++++++- .../agg_simplification_1_mysql.sql | 64 ++++++++--------- .../agg_simplification_1_sqlite.sql | 50 ++++++------- .../defog_academic_gen15_ansi.sql | 2 +- .../defog_academic_gen15_mysql.sql | 2 +- .../defog_academic_gen15_postgres.sql | 2 +- .../defog_academic_gen15_snowflake.sql | 2 +- .../defog_academic_gen15_sqlite.sql | 2 +- .../defog_academic_gen17_ansi.sql | 2 +- .../defog_academic_gen17_mysql.sql | 2 +- .../defog_academic_gen17_postgres.sql | 2 +- .../defog_academic_gen17_snowflake.sql | 2 +- .../defog_academic_gen17_sqlite.sql | 2 +- .../defog_academic_gen19_ansi.sql | 2 +- .../defog_academic_gen19_mysql.sql | 2 +- .../defog_academic_gen19_postgres.sql | 2 +- .../defog_academic_gen19_snowflake.sql | 2 +- .../defog_academic_gen19_sqlite.sql | 2 +- .../defog_dealership_adv3_ansi.sql | 2 +- .../defog_dealership_adv3_mysql.sql | 2 +- .../defog_dealership_adv3_postgres.sql | 2 +- .../defog_dealership_adv3_snowflake.sql | 2 +- .../defog_dealership_adv3_sqlite.sql | 2 +- .../defog_dealership_adv4_ansi.sql | 5 +- .../defog_dealership_adv4_mysql.sql | 5 +- .../defog_dealership_adv4_postgres.sql | 5 +- .../defog_dealership_adv4_snowflake.sql | 5 +- .../defog_dealership_adv4_sqlite.sql | 5 +- .../defog_ewallet_basic10_ansi.sql | 2 +- .../defog_ewallet_basic10_mysql.sql | 2 +- .../defog_ewallet_basic10_postgres.sql | 2 +- .../defog_ewallet_basic10_snowflake.sql | 2 +- .../defog_ewallet_basic10_sqlite.sql | 2 +- .../epoch_intra_season_searches_ansi.sql | 9 +-- .../epoch_intra_season_searches_mysql.sql | 9 +-- .../epoch_intra_season_searches_postgres.sql | 3 +- .../epoch_intra_season_searches_snowflake.sql | 9 +-- .../epoch_intra_season_searches_sqlite.sql | 9 +-- .../sqlite_udf_combine_strings_sqlite.sql | 2 +- ...raph_country_combination_analysis_ansi.sql | 2 +- ...aph_country_combination_analysis_mysql.sql | 2 +- ..._country_combination_analysis_postgres.sql | 2 +- ...country_combination_analysis_snowflake.sql | 2 +- ...ph_country_combination_analysis_sqlite.sql | 2 +- ...ph_country_incident_rate_analysis_ansi.sql | 2 +- ...h_country_incident_rate_analysis_mysql.sql | 2 +- ...ountry_incident_rate_analysis_postgres.sql | 2 +- ...untry_incident_rate_analysis_snowflake.sql | 2 +- ..._country_incident_rate_analysis_sqlite.sql | 2 +- ...r_rate_sun_set_by_factory_country_ansi.sql | 2 +- ..._rate_sun_set_by_factory_country_mysql.sql | 2 +- ...te_sun_set_by_factory_country_postgres.sql | 2 +- ...e_sun_set_by_factory_country_snowflake.sql | 2 +- ...rate_sun_set_by_factory_country_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q21_ansi.sql | 9 +-- tests/test_sql_refsols/tpch_q21_mysql.sql | 9 +-- tests/test_sql_refsols/tpch_q21_postgres.sql | 9 +-- tests/test_sql_refsols/tpch_q21_snowflake.sql | 9 +-- tests/test_sql_refsols/tpch_q21_sqlite.sql | 9 +-- 59 files changed, 191 insertions(+), 186 deletions(-) diff --git a/pydough/sqlglot/override_simplify.py b/pydough/sqlglot/override_simplify.py index 27d838011..53ee4cc12 100644 --- a/pydough/sqlglot/override_simplify.py +++ b/pydough/sqlglot/override_simplify.py @@ -26,6 +26,7 @@ extract_date, extract_type, flatten, + is_null, logger, propagate_constants, remove_complements, @@ -125,7 +126,8 @@ def _simplify(expression, root=True): node = simplify_conditionals(node) # PyDough Change: new pre-order transformations - node = rewrite_case_nullif(node) + node = rewrite_case_to_nullif(node) + node = rewrite_coalesce_nullif(node) if constant_propagation: node = propagate_constants(node, root) @@ -230,7 +232,7 @@ def simplify_datetrunc(expression: exp.Expression, dialect: Dialect) -> exp.Expr return expression -def rewrite_case_nullif(expr: exp.Expression) -> exp.Expression: +def rewrite_case_to_nullif(expr: exp.Expression) -> exp.Expression: """ Rewrite expressions like `CASE WHEN x != y THEN x ELSE NULL END` to `NULLIF(x, y)` @@ -244,5 +246,69 @@ def rewrite_case_nullif(expr: exp.Expression) -> exp.Expression: if not isinstance(expr, exp.Case): return expr - # breakpoint() + if ( + not (expr.args.get("this") is None and is_null(expr.args.get("default", None))) + and len(expr.args.get("ifs", [])) == 1 + ): + return expr + + if_expr = expr.args["ifs"][0] + condition = if_expr.args.get("this") + result = if_expr.args.get("true") + + if not isinstance(condition, exp.NEQ): + return expr + + lhs = condition.args.get("this") + rhs = condition.args.get("expression") + + if lhs == result: + return exp.Nullif(this=lhs, expression=rhs, copy=False) + + if rhs == result: + return exp.Nullif(this=rhs, expression=lhs, copy=False) + return expr + + +def rewrite_coalesce_nullif(expr: exp.Expression) -> exp.Expression: + """ + Rewrite expressions like `COALESCE(NULLIF(x, y), z)` to + `CASE WHEN x = y THEN z ELSE x END`, or if `y` and `z` are the same then + just to `x`. + + Args: + `expr`: The expression to rewrite. + + Returns: + The rewritten expression. + """ + if not isinstance(expr, exp.Coalesce): + return expr + + if len(expr.expressions) != 1 or expr.args.get("is_nvl"): + return expr + + first = expr.this + second = expr.expressions[0] + + if not isinstance(first, exp.Nullif): + return expr + + lhs = first.args.get("this") + rhs = first.args.get("expression") + + if rhs == second: + return lhs + + return exp.Case( + whens=[ + exp.When( + this=exp.EQ(this=lhs, expression=rhs, copy=False), + true=second, + copy=False, + ) + ], + default=lhs, + copy=False, + ) diff --git a/tests/test_sql_refsols/agg_simplification_1_mysql.sql b/tests/test_sql_refsols/agg_simplification_1_mysql.sql index 5667ce930..d24493ae3 100644 --- a/tests/test_sql_refsols/agg_simplification_1_mysql.sql +++ b/tests/test_sql_refsols/agg_simplification_1_mysql.sql @@ -4,10 +4,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( ( - COUNT(1) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(1) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) / 2.0 ) ) < 1.0 @@ -17,10 +17,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( ( - COUNT(2) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(2) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) / 2.0 ) ) < 1.0 @@ -30,10 +30,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( ( - COUNT(-1) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(-1) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) / 2.0 ) ) < 1.0 @@ -43,10 +43,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( ( - COUNT(-3) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(-3) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) / 2.0 ) ) < 1.0 @@ -56,10 +56,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( ( - COUNT(0) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(0) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) / 2.0 ) ) < 1.0 @@ -69,10 +69,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( ( - COUNT(0.5) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(0.5) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) / 2.0 ) ) < 1.0 @@ -82,74 +82,70 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) DESC) - 1.0 + ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) DESC) - 1.0 ) - ( ( - COUNT( - CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) - ) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) / 2.0 ) ) < 1.0 - THEN CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + THEN CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ELSE NULL END AS expr_79, CASE WHEN TRUNCATE( - CAST(0.9 * COUNT(1) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS FLOAT), + CAST(0.9 * COUNT(1) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN 1 ELSE NULL END AS expr_80, CASE WHEN TRUNCATE( - CAST(0.8 * COUNT(2) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS FLOAT), + CAST(0.8 * COUNT(2) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN 2 ELSE NULL END AS expr_81, CASE WHEN TRUNCATE( - CAST(0.7 * COUNT(-1) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS FLOAT), + CAST(0.7 * COUNT(-1) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN -1 ELSE NULL END AS expr_82, CASE WHEN TRUNCATE( - CAST(0.6 * COUNT(-3) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS FLOAT), + CAST(0.6 * COUNT(-3) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN -3 ELSE NULL END AS expr_83, CASE WHEN TRUNCATE( - CAST(0.5 * COUNT(0) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS FLOAT), + CAST(0.5 * COUNT(0) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN 0 ELSE NULL END AS expr_84, CASE WHEN TRUNCATE( - CAST(0.4 * COUNT(0.5) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS FLOAT), + CAST(0.4 * COUNT(0.5) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN 0.5 ELSE NULL END AS expr_85, CASE WHEN TRUNCATE( - CAST(0.19999999999999996 * COUNT( - CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) - ) OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS FLOAT), + CAST(0.19999999999999996 * COUNT(CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS FLOAT), 0 - ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) DESC) - THEN CHAR_LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ) < ROW_NUMBER() OVER (PARTITION BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) DESC) + THEN CHAR_LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ELSE NULL END AS expr_87 FROM main.sbTicker diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql index 3a89a1dc1..74738286b 100644 --- a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -4,10 +4,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( CAST(( - COUNT(1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(1) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 @@ -17,10 +17,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( CAST(( - COUNT(2) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(2) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 @@ -30,10 +30,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( CAST(( - COUNT(-1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(-1) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 @@ -43,10 +43,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( CAST(( - COUNT(-3) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(-3) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 @@ -56,10 +56,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( CAST(( - COUNT(0) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(0) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 @@ -69,10 +69,10 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') - 1.0 + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 ) - ( CAST(( - COUNT(0.5) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(0.5) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 @@ -82,53 +82,49 @@ WITH _t1 AS ( CASE WHEN ABS( ( - ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) DESC) - 1.0 + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) DESC) - 1.0 ) - ( CAST(( - COUNT( - LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) - ) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + COUNT(LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 - THEN LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + THEN LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ELSE NULL END AS expr_79, CASE - WHEN CAST(0.9 * COUNT(1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + WHEN CAST(0.9 * COUNT(1) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN 1 ELSE NULL END AS expr_80, CASE - WHEN CAST(0.8 * COUNT(2) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + WHEN CAST(0.8 * COUNT(2) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN 2 ELSE NULL END AS expr_81, CASE - WHEN CAST(0.7 * COUNT(-1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + WHEN CAST(0.7 * COUNT(-1) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN -1 ELSE NULL END AS expr_82, CASE - WHEN CAST(0.6 * COUNT(-3) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + WHEN CAST(0.6 * COUNT(-3) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN -3 ELSE NULL END AS expr_83, CASE - WHEN CAST(0.5 * COUNT(0) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + WHEN CAST(0.5 * COUNT(0) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN 0 ELSE NULL END AS expr_84, CASE - WHEN CAST(0.4 * COUNT(0.5) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY '1') + WHEN CAST(0.4 * COUNT(0.5) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') THEN 0.5 ELSE NULL END AS expr_85, CASE - WHEN CAST(0.19999999999999996 * COUNT( - LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) - ) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) DESC) - THEN LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + WHEN CAST(0.19999999999999996 * COUNT(LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) DESC) + THEN LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ELSE NULL END AS expr_87 FROM main.sbticker diff --git a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql index ac6ecaa8b..f4bf02ae7 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio + COALESCE(SUM(NULLIF(count_oid, 0)), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql index ac6ecaa8b..f4bf02ae7 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio + COALESCE(SUM(NULLIF(count_oid, 0)), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql index de72ee868..871e47887 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - CAST(COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) AS DOUBLE PRECISION) / COUNT(*) AS ratio + CAST(COALESCE(SUM(NULLIF(count_oid, 0)), 0) AS DOUBLE PRECISION) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql index e18408503..38ba10766 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) / COUNT(*) AS ratio + COALESCE(SUM(NULLIF(count_oid, 0)), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql index a5e24526b..dad2b9f3e 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - CAST(COALESCE(SUM(CASE WHEN count_oid <> 0 THEN count_oid ELSE NULL END), 0) AS REAL) / COUNT(*) AS ratio + CAST(COALESCE(SUM(NULLIF(count_oid, 0)), 0) AS REAL) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql index 286c4a7e3..bbbe209fb 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql index cf49f6bca..a0ad43018 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql index 5548d7996..850f09361 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql index 25eb987d3..4b8c78541 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql index 54502b0af..6ba14ab70 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS count_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql index b8a7f31c7..2e23128a5 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql index 866d80622..92d1cbe17 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql index 244567dc4..73c660fc1 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql index 3b3b803af..92a20b415 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql index 110473596..3e4a133a5 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(CASE WHEN COUNT(publication.cid) <> 0 THEN COUNT(publication.cid) ELSE NULL END, 0) AS num_publications + COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index 2e56fb6cf..30a2cb178 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index 2e56fb6cf..30a2cb178 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index cda148aab..50a39be6d 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index 954ae6bad..f678569a6 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index cda148aab..50a39be6d 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(CASE WHEN count_car_id <> 0 THEN count_car_id ELSE NULL END), 0) AS num_sales + COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql index c4dc34942..3e8501113 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql @@ -7,11 +7,10 @@ WITH _s1 AS ( sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql index b4e0ae20c..8639c976c 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql @@ -7,11 +7,10 @@ WITH _s1 AS ( sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql index 0bc1b1044..ff9c3a4e7 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql @@ -7,11 +7,10 @@ WITH _s1 AS ( sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql index 3c433feb5..f75f4ad36 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -7,11 +7,10 @@ WITH _s1 AS ( sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql index 55c0bbf21..0f88b4632 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql @@ -7,11 +7,10 @@ WITH _s1 AS ( sale_date >= DATETIME('now', '-30 day') ) SELECT - COALESCE(CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END, 0) AS num_sales, + COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, CASE WHEN ( - CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END > 0 - AND NOT CASE WHEN COUNT(_s1.car_id) <> 0 THEN COUNT(_s1.car_id) ELSE NULL END IS NULL + NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 ) THEN COALESCE(SUM(_s1.sale_price), 0) ELSE NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 4c667c939..93c10b5b1 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql index 2e5188339..b96665551 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql index 60518b318..e04f6eba9 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT MAX(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql index 2d71bf033..6c9df9c12 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 2226c08c1..15fb10f35 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT MAX(merchants.name) AS merchant_name, - COALESCE(CASE WHEN COUNT(_s1.receiver_id) <> 0 THEN COUNT(_s1.receiver_id) ELSE NULL END, 0) AS total_transactions, + COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql index 7c3fa09e1..3ce047156 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql @@ -46,12 +46,9 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM( - ( - CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL - ) - ) AS sum_is_intra_season + SUM(( + NOT NULLIF(count_search_id, 0) IS NULL AND NULLIF(count_search_id, 0) > 0 + )) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql index 6a16f47ab..19c0198c5 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_mysql.sql @@ -46,12 +46,9 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM( - ( - CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL - ) - ) AS sum_is_intra_season + SUM(( + NOT NULLIF(count_search_id, 0) IS NULL AND NULLIF(count_search_id, 0) > 0 + )) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql index 446f0aa62..d090f9eeb 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_postgres.sql @@ -49,8 +49,7 @@ WITH _s0 AS ( SUM( CASE WHEN ( - CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL + NOT NULLIF(count_search_id, 0) IS NULL AND NULLIF(count_search_id, 0) > 0 ) THEN 1 ELSE 0 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql index 6195f1e37..c9418b406 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql @@ -46,12 +46,9 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - COUNT_IF( - ( - CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL - ) - ) AS sum_is_intra_season + COUNT_IF(( + NOT NULLIF(count_search_id, 0) IS NULL AND NULLIF(count_search_id, 0) > 0 + )) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql index 204d8d3bd..6b664ade5 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql @@ -48,12 +48,9 @@ WITH _s0 AS ( SELECT s_name, COUNT(*) AS n_rows, - SUM( - ( - CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END > 0 - AND NOT CASE WHEN count_search_id <> 0 THEN count_search_id ELSE NULL END IS NULL - ) - ) AS sum_is_intra_season + SUM(( + NOT NULLIF(count_search_id, 0) IS NULL AND NULLIF(count_search_id, 0) > 0 + )) AS sum_is_intra_season FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql index c5d1f2e61..56cc7cf5b 100644 --- a/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql @@ -1,6 +1,6 @@ WITH _s0 AS ( SELECT - GROUP_CONCAT(CASE WHEN r_name <> 'EUROPE' THEN r_name ELSE NULL END, ', ') AS agg_1, + GROUP_CONCAT(NULLIF(r_name, 'EUROPE'), ', ') AS agg_1, GROUP_CONCAT(r_name) AS combine_strings_r_name FROM tpch.region ), _s1 AS ( diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index 58fd231a6..20c766e89 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql index dc13b2fb2..bbc0e0461 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql index 209791276..90a80c8b1 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql index 09346adb4..600d1f4f6 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index a40016e67..8532a5ea6 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index 0bc9bd4b2..10925d008 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql index 8aeb57830..ec9aac85b 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql index 5e3d70538..1b7347723 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql index 9711eaa46..285263516 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index 503174b3c..8f5994503 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END) AS sum_n_rows + SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index 7c785df60..b7b08d902 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql index 8a3d4d7b2..5aac2d9f9 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( DEVICES.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql index 8a33e5c9f..adc28a5fd 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql index 9f149a6f1..b68ca2880 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index 692772d12..c5d444f5a 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(CASE WHEN count_in_device_id <> 0 THEN count_in_device_id ELSE NULL END), 0) AS sum_n_incidents, + COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index 7cea9ff0c..6a785257f 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -49,14 +49,7 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COALESCE( - CASE - WHEN COUNT(_s13.anything_l_suppkey) <> 0 - THEN COUNT(_s13.anything_l_suppkey) - ELSE NULL - END, - 0 - ) AS NUMWAIT + COALESCE(NULLIF(COUNT(_s13.anything_l_suppkey), 0), 0) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index 77437cd1a..9c3775ded 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -53,14 +53,7 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, - COALESCE( - CASE - WHEN COUNT(_s13.anything_l_suppkey) <> 0 - THEN COUNT(_s13.anything_l_suppkey) - ELSE NULL - END, - 0 - ) AS NUMWAIT + COALESCE(NULLIF(COUNT(_s13.anything_l_suppkey), 0), 0) AS NUMWAIT FROM tpch.SUPPLIER AS SUPPLIER JOIN tpch.NATION AS NATION ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index fe5d9711c..b78023717 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -53,14 +53,7 @@ WITH _t5 AS ( ) SELECT MAX(supplier.s_name) AS S_NAME, - COALESCE( - CASE - WHEN COUNT(_s13.anything_l_suppkey) <> 0 - THEN COUNT(_s13.anything_l_suppkey) - ELSE NULL - END, - 0 - ) AS NUMWAIT + COALESCE(NULLIF(COUNT(_s13.anything_l_suppkey), 0), 0) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index b30608553..f83e82485 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -53,14 +53,7 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COALESCE( - CASE - WHEN COUNT(_s13.anything_l_suppkey) <> 0 - THEN COUNT(_s13.anything_l_suppkey) - ELSE NULL - END, - 0 - ) AS NUMWAIT + COALESCE(NULLIF(COUNT(_s13.anything_l_suppkey), 0), 0) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index 2a8b65cd4..c5ceb7d67 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -53,14 +53,7 @@ WITH _t5 AS ( ) SELECT MAX(supplier.s_name) AS S_NAME, - COALESCE( - CASE - WHEN COUNT(_s13.anything_l_suppkey) <> 0 - THEN COUNT(_s13.anything_l_suppkey) - ELSE NULL - END, - 0 - ) AS NUMWAIT + COUNT(_s13.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey From cbc54087bc9d2938becd760f3f9c0f96e6e948c6 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 17:45:17 -0400 Subject: [PATCH 120/143] Updating plans, still need to fix common_prefix_y --- tests/test_sql_refsols/defog_academic_gen17_ansi.sql | 2 +- tests/test_sql_refsols/defog_academic_gen17_mysql.sql | 2 +- tests/test_sql_refsols/defog_academic_gen17_postgres.sql | 2 +- tests/test_sql_refsols/defog_academic_gen17_snowflake.sql | 2 +- tests/test_sql_refsols/defog_academic_gen17_sqlite.sql | 2 +- tests/test_sql_refsols/defog_academic_gen19_ansi.sql | 2 +- tests/test_sql_refsols/defog_academic_gen19_mysql.sql | 2 +- tests/test_sql_refsols/defog_academic_gen19_postgres.sql | 2 +- tests/test_sql_refsols/defog_academic_gen19_snowflake.sql | 2 +- tests/test_sql_refsols/defog_academic_gen19_sqlite.sql | 2 +- tests/test_sql_refsols/defog_dealership_adv4_ansi.sql | 2 +- tests/test_sql_refsols/defog_dealership_adv4_mysql.sql | 2 +- tests/test_sql_refsols/defog_dealership_adv4_postgres.sql | 2 +- tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql | 2 +- tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql | 2 +- tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql | 2 +- tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql | 2 +- tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql | 2 +- tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql | 2 +- tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q21_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q21_mysql.sql | 2 +- tests/test_sql_refsols/tpch_q21_postgres.sql | 2 +- tests/test_sql_refsols/tpch_q21_snowflake.sql | 2 +- 24 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql index bbbe209fb..504cb2598 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_ansi.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql index a0ad43018..b8207083a 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_mysql.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql index 850f09361..4635122b3 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_postgres.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql index 4b8c78541..a4ef4321b 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_snowflake.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql index 6ba14ab70..492220fdb 100644 --- a/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen17_sqlite.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS count_publications + COUNT(publication.cid) AS count_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql index 2e23128a5..38b7fbcc1 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_ansi.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql index 92d1cbe17..846c76c9d 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_mysql.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) COLLATE utf8mb4_bin AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql index 73c660fc1..4b7b9b29b 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_postgres.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql index 92a20b415..5cfa8fc98 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_snowflake.sql @@ -1,6 +1,6 @@ SELECT ANY_VALUE(conference.name) AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql index 3e4a133a5..4605e9446 100644 --- a/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen19_sqlite.sql @@ -1,6 +1,6 @@ SELECT MAX(conference.name) AS name, - COALESCE(NULLIF(COUNT(publication.cid), 0), 0) AS num_publications + COUNT(publication.cid) AS num_publications FROM main.conference AS conference LEFT JOIN main.publication AS publication ON conference.cid = publication.cid diff --git a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql index 3e8501113..a7f38d408 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql @@ -7,7 +7,7 @@ WITH _s1 AS ( sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) ) SELECT - COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, + COUNT(_s1.car_id) AS num_sales, CASE WHEN ( NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql index 8639c976c..71a7d9b41 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql @@ -7,7 +7,7 @@ WITH _s1 AS ( sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) ) SELECT - COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, + COUNT(_s1.car_id) AS num_sales, CASE WHEN ( NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql index ff9c3a4e7..72140b7e6 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql @@ -7,7 +7,7 @@ WITH _s1 AS ( sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' ) SELECT - COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, + COUNT(_s1.car_id) AS num_sales, CASE WHEN ( NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql index f75f4ad36..27cf9deaf 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -7,7 +7,7 @@ WITH _s1 AS ( sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) ) SELECT - COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, + COUNT(_s1.car_id) AS num_sales, CASE WHEN ( NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql index 0f88b4632..90d404adf 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql @@ -7,7 +7,7 @@ WITH _s1 AS ( sale_date >= DATETIME('now', '-30 day') ) SELECT - COALESCE(NULLIF(COUNT(_s1.car_id), 0), 0) AS num_sales, + COUNT(_s1.car_id) AS num_sales, CASE WHEN ( NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 93c10b5b1..9da610e5d 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, + COUNT(_s1.receiver_id) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql index b96665551..f7f6babf0 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_mysql.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, + COUNT(_s1.receiver_id) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql index e04f6eba9..1c7b93bbe 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_postgres.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT MAX(merchants.name) AS merchant_name, - COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, + COUNT(_s1.receiver_id) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql index 6c9df9c12..3618ff61c 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT ANY_VALUE(merchants.name) AS merchant_name, - COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, + COUNT(_s1.receiver_id) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 15fb10f35..43b6b1cf6 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -9,7 +9,7 @@ WITH _s1 AS ( ) SELECT MAX(merchants.name) AS merchant_name, - COALESCE(NULLIF(COUNT(_s1.receiver_id), 0), 0) AS total_transactions, + COUNT(_s1.receiver_id) AS total_transactions, COALESCE(SUM(_s1.amount), 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index 6a785257f..a53a1e9b5 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -49,7 +49,7 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COALESCE(NULLIF(COUNT(_s13.anything_l_suppkey), 0), 0) AS NUMWAIT + COUNT(_s13.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index 9c3775ded..302c196ca 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -53,7 +53,7 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, - COALESCE(NULLIF(COUNT(_s13.anything_l_suppkey), 0), 0) AS NUMWAIT + COUNT(_s13.anything_l_suppkey) AS NUMWAIT FROM tpch.SUPPLIER AS SUPPLIER JOIN tpch.NATION AS NATION ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index b78023717..79c4527cd 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -53,7 +53,7 @@ WITH _t5 AS ( ) SELECT MAX(supplier.s_name) AS S_NAME, - COALESCE(NULLIF(COUNT(_s13.anything_l_suppkey), 0), 0) AS NUMWAIT + COUNT(_s13.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index f83e82485..aa6a0445b 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -53,7 +53,7 @@ WITH _t5 AS ( ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COALESCE(NULLIF(COUNT(_s13.anything_l_suppkey), 0), 0) AS NUMWAIT + COUNT(_s13.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey From f7091d800b22956960a8c65f29661ad05b60b3f9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 18:39:56 -0400 Subject: [PATCH 121/143] Fixing common_prefix_y [RUN CI] --- tests/test_pipeline_common_prefix.py | 5 +++-- tests/test_plan_refsols/common_prefix_y.txt | 3 ++- .../common_prefix_pydough_functions.py | 14 ++++++++------ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/test_pipeline_common_prefix.py b/tests/test_pipeline_common_prefix.py index 3796d6b44..a7bcf62c6 100644 --- a/tests/test_pipeline_common_prefix.py +++ b/tests/test_pipeline_common_prefix.py @@ -623,12 +623,13 @@ { "name": [ f"Customer#{i:09}" - for i in (138841, 36091, 54952, 103768, 46081) + for i in (6434, 45280, 60493, 87616, 132775) ], - "n_orders": [21, 20, 19, 19, 17], + "n_orders": [2, 2, 2, 2, 2], } ), "common_prefix_y", + order_sensitive=True, ), id="common_prefix_y", ), diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 931501010..9e388ef51 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -6,6 +6,7 @@ ROOT(columns=[('name', anything_c_name), ('n_orders', DEFAULT_TO(KEEP_IF(count_a PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric)}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_pydough_functions/common_prefix_pydough_functions.py b/tests/test_pydough_functions/common_prefix_pydough_functions.py index 9dab96308..fc90c1267 100644 --- a/tests/test_pydough_functions/common_prefix_pydough_functions.py +++ b/tests/test_pydough_functions/common_prefix_pydough_functions.py @@ -428,12 +428,14 @@ def common_prefix_x(): def common_prefix_y(): - # For each customer who has NEVER made a zero-tax purchase, count how - # many total orders they have made. Keep the top 5 customers by number - # of orders, breaking ties by customer name. - return ( - customers.WHERE(HASNOT(orders.lines.WHERE(tax == 0))) - .CALCULATE(name, n_orders=COUNT(orders)) + # For each customer who has NEVER made a zero-tax purchase through clerk + # number 1, count how many total orders they have made through that clerk. + # Keep the top 5 customers by number of orders, breaking ties by customer + # name. + clerk_one_orders = orders.WHERE(clerk == "Clerk#000000001") + return ( + customers.WHERE(HASNOT(clerk_one_orders.lines.WHERE(tax == 0))) + .CALCULATE(name, n_orders=COUNT(clerk_one_orders)) .TOP_K(5, by=(n_orders.DESC(), name.ASC())) ) From 2da07fed831de2f288d99d18f99d36e1c02315c2 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 22:53:33 -0400 Subject: [PATCH 122/143] Additional revisions and comments [RUN CI] --- .../conversion/join_aggregate_transpose.py | 65 +++++++++++++++---- .../conversion/relational_simplification.py | 27 ++++++-- tests/test_plan_refsols/common_prefix_am.txt | 2 +- tests/test_plan_refsols/common_prefix_an.txt | 2 +- tests/test_plan_refsols/correl_35.txt | 2 +- ..._year_cumulative_incident_rate_overall.txt | 2 +- .../common_prefix_pydough_functions.py | 6 +- .../correlated_pydough_functions.py | 6 +- .../defog_test_functions.py | 20 +++--- .../epoch_pydough_functions.py | 2 +- .../technograph_pydough_functions.py | 2 +- tests/test_sql_refsols/correl_35_sqlite.sql | 2 +- .../defog_academic_gen11_ansi.sql | 2 +- .../defog_academic_gen11_mysql.sql | 2 +- .../defog_academic_gen11_postgres.sql | 2 +- .../defog_academic_gen11_snowflake.sql | 2 +- .../defog_academic_gen11_sqlite.sql | 2 +- .../defog_academic_gen12_ansi.sql | 2 +- .../defog_academic_gen12_mysql.sql | 2 +- .../defog_academic_gen12_postgres.sql | 6 +- .../defog_academic_gen12_snowflake.sql | 2 +- .../defog_academic_gen12_sqlite.sql | 2 +- .../defog_academic_gen13_ansi.sql | 8 +-- .../defog_academic_gen13_mysql.sql | 8 +-- .../defog_academic_gen13_postgres.sql | 8 +-- .../defog_academic_gen13_snowflake.sql | 8 +-- .../defog_academic_gen13_sqlite.sql | 8 +-- .../defog_academic_gen14_ansi.sql | 2 +- .../defog_academic_gen14_mysql.sql | 2 +- .../defog_academic_gen14_postgres.sql | 2 +- .../defog_academic_gen14_snowflake.sql | 2 +- .../defog_academic_gen14_sqlite.sql | 2 +- .../defog_broker_adv8_ansi.sql | 2 +- .../defog_broker_adv8_mysql.sql | 2 +- .../defog_broker_adv8_postgres.sql | 2 +- .../defog_broker_adv8_snowflake.sql | 2 +- .../defog_broker_adv8_sqlite.sql | 2 +- .../defog_dealership_adv4_ansi.sql | 25 ++----- .../defog_dealership_adv4_mysql.sql | 25 ++----- .../defog_dealership_adv4_postgres.sql | 25 ++----- .../defog_dealership_adv4_snowflake.sql | 25 ++----- .../defog_dealership_adv4_sqlite.sql | 24 ++----- .../defog_ewallet_adv4_ansi.sql | 2 +- .../defog_ewallet_adv4_mysql.sql | 2 +- .../defog_ewallet_adv4_postgres.sql | 2 +- .../defog_ewallet_adv4_snowflake.sql | 2 +- .../defog_ewallet_adv4_sqlite.sql | 2 +- .../defog_ewallet_basic8_ansi.sql | 5 +- .../defog_ewallet_basic8_mysql.sql | 5 +- .../defog_ewallet_basic8_postgres.sql | 5 +- .../defog_ewallet_basic8_snowflake.sql | 5 +- .../defog_ewallet_basic8_sqlite.sql | 5 +- ..._cumulative_incident_rate_overall_ansi.sql | 2 +- ...cumulative_incident_rate_overall_mysql.sql | 2 +- ...ulative_incident_rate_overall_postgres.sql | 2 +- ...lative_incident_rate_overall_snowflake.sql | 2 +- ...umulative_incident_rate_overall_sqlite.sql | 2 +- 57 files changed, 161 insertions(+), 230 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index 5a9bc75fd..fa6adbf1c 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -1,4 +1,7 @@ -""" """ +""" +Logical plan transformation to pull aggregates above joins when possible for +optimization purposes. +""" __all__ = ["pull_aggregates_above_joins"] @@ -31,7 +34,9 @@ class JoinAggregateTransposeShuttle(RelationalShuttle): """ - TODO + Relational shuttle Transposes joins and aggregates in the relational + algebra, moving the currently aggregate underneath the join to be above + the join instead for performance gains. """ left_join_case_ops = { @@ -48,7 +53,8 @@ class JoinAggregateTransposeShuttle(RelationalShuttle): pydop.POPULATION_STD, } """ - TODO: add description + The set of aggregation operators that are safe transpose under a LEFT JOIN + when the aggregate is on the right side of the join. """ def __init__(self): @@ -175,7 +181,8 @@ def join_aggregate_transpose( if not is_left: agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs - # TODO ADD COMMENTS + # Obtain the input aliases for both sides of the join, identified with + # which one belongs to the aggregate versus the other input. agg_alias: str | None = ( join.default_input_aliases[0] if is_left else join.default_input_aliases[1] ) @@ -214,6 +221,7 @@ def join_aggregate_transpose( ): sentinel_join_name: str | None = None if existing_sentinel is not None: + # If there is an existing sentinel column from before, use it. for col_name, col_expr in join.columns.items(): if ( isinstance(col_expr, ColumnReference) @@ -222,6 +230,7 @@ def join_aggregate_transpose( sentinel_join_name = col_name break else: + # Otherwise, create a new COUNT(*) column for that purpose. agg_name = self.generate_name("n_rows", aggregate.columns) aggregate.columns[agg_name] = aggregate.aggregations[agg_name] = ( CallExpression( @@ -247,7 +256,15 @@ def join_aggregate_transpose( else: new_cardinality = new_cardinality.add_plural() - # TODO ADD COMMENTS + # Build up the new columns for the join and aggregate, as well as a + # substitution mapping to remap references from the old join to the new + # join and aggregate, and another to remap references used by the join + # condition. The columns for the new aggregate will start out with the + # same keys and aggregations as the old one, since the columns from the + # aggregate's input will be passed through the join without any + # renaming, then all of the other columns from the non-aggregate side of + # the join will be added as ANYTHING aggregations to the new aggregate + # so that they can be referenced in the final projection. new_join_columns: dict[str, RelationalExpression] = {} new_aggregate_keys: dict[str, RelationalExpression] = dict(aggregate.keys) new_aggregate_aggs: dict[str, CallExpression] = dict(aggregate.aggregations) @@ -259,15 +276,15 @@ def join_aggregate_transpose( add_input_name(key_expr, agg_alias) ) - # TODO ADD COMMENTS + # Extract the node that is the input to the aggregate, as well as the + # other input to the join, as these shall be the two inputs to the new + # join. agg_input: RelationalNode = aggregate.inputs[0] non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] new_join_inputs: list[RelationalNode] = ( [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] ) - new_project_columns: dict[str, RelationalExpression] = {} - # Start by placing all of the columns from the aggregate node's input # into the join's columns so that the aggregate keys/aggregations can # refer to them with the same names, without any renaming caused by @@ -285,7 +302,10 @@ def join_aggregate_transpose( ColumnReference(col_name, col_expr.data_type) ) - # TODO ADD COMMENTS + # Iterate through all of the columns from the non-aggregate side of + # the join, adding them as ANYTHING aggregations to the new aggregate + # so that they can be referenced in the final projection, while also + # adding them as regular columns to the new join. for col_name, col_expr in non_agg_input.columns.items(): join_name = self.generate_name(col_name, new_join_columns) new_join_columns[join_name] = ColumnReference( @@ -327,6 +347,7 @@ def join_aggregate_transpose( [sentinel_column, LiteralExpression(0, NumericType())], ) + # A function to transform `X` -> `KEEP_IF(X, sentinel_column != 0)`` def sentinel_fn(expr: RelationalExpression) -> RelationalExpression: return CallExpression( pydop.KEEP_IF, expr.data_type, [expr, sentinel_cmp] @@ -339,11 +360,18 @@ def sentinel_fn(expr: RelationalExpression) -> RelationalExpression: ) join_sub[agg_ref_expr] = sentinel_fn(join_sub[agg_ref_expr]) - # TODO ADD COMMENTS + # Create the columns of the final projection which will occur after + # the aggregate to rename columns as needed. This is done by finding + # all of the columns from the original join's output, and applying + # the join substitution to them so that they refer to the correct + # columns from the new aggregate. + new_project_columns: dict[str, RelationalExpression] = {} for col_name, col_expr in join.columns.items(): new_project_columns[col_name] = apply_substitution(col_expr, join_sub, {}) - # TODO ADD COMMENTS + # Build the new Join by joining the aggregate's input with the other + # side of the join, using the remapped join condition, and the new + # columns and cardinalities. new_join: Join = Join( new_join_inputs, apply_substitution(join.condition, join_cond_sub, {}), @@ -354,19 +382,28 @@ def sentinel_fn(expr: RelationalExpression) -> RelationalExpression: join.correl_name, ) - # TODO ADD COMMENTS + # Build the new Aggregate node on top of the new Join, using the + # remapped keys and additional aggregations. new_aggregate: Aggregate = Aggregate( new_join, new_aggregate_keys, new_aggregate_aggs ) - # TODO ADD COMMENTS + # Build the new Project node on top of the new Aggregate, using the + # remapped columns. new_project: Project = Project(new_aggregate, new_project_columns) return new_project def pull_aggregates_above_joins(node: RelationalRoot) -> RelationalNode: """ - TODO + Runs the logical plan transformation to pull aggregates above joins when + possible for optimization purposes. + + Args: + `node`: The root relational node to transform. + + Returns: + The transformed relational tree. """ shuttle: JoinAggregateTransposeShuttle = JoinAggregateTransposeShuttle() return node.accept_shuttle(shuttle) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 4eaf33e7a..33410ea1f 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -1177,11 +1177,15 @@ def simplify_function_call( case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: match (expr.inputs[0], expr.op, expr.inputs[1]): # x > y is True if x is positive and y is a literal that is - # zero or negative. The same goes for x >= y. - case (_, pydop.GRT, LiteralExpression()) | ( - _, - pydop.GEQ, - LiteralExpression(), + # zero or negative. The same goes for x != y and x >= y. + case ( + (_, pydop.GRT, LiteralExpression()) + | (_, pydop.NEQ, LiteralExpression()) + | ( + _, + pydop.GEQ, + LiteralExpression(), + ) ) if ( isinstance(expr.inputs[1].value, (int, float, bool)) and expr.inputs[1].value <= 0 @@ -1206,6 +1210,19 @@ def simplify_function_call( not_null=True, not_negative=True, positive=True ) + # x != y is True if x is non-negative and y is a literal + # that is negative + case (_, pydop.NEQ, LiteralExpression()) if ( + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value < 0 + and arg_predicates[0].not_null + and arg_predicates[0].not_negative + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + # The rest of the case of x CMP y can be constant folded if # both x and y are literals. case (LiteralExpression(), _, LiteralExpression()): diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 80a5efb02..b7a89f156 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -6,7 +6,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numer SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - FILTER(condition=sum_n_rows != 0:numeric & sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index a2198f90f..0ba6c7410 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,5 +1,5 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric)), ('n_no_tax_discount', anything_n_rows_0)], orderings=[(c_custkey):asc_first]) - FILTER(condition=sum_n_rows != 0:numeric & DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric) > RELAVG(args=[DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric)], partition=[anything_c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric, columns={'anything_n_rows_0': anything_n_rows_0, 'c_custkey': c_custkey, 'count_anything_o_custkey': count_anything_o_custkey}) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) != 0:numeric & sum_n_rows != 0:numeric & DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric) > RELAVG(args=[DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric)], partition=[anything_c_nationkey], order=[]), columns={'anything_n_rows_0': anything_n_rows_0, 'c_custkey': c_custkey, 'count_anything_o_custkey': count_anything_o_custkey}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_nationkey': ANYTHING(c_nationkey), 'anything_n_rows_0': ANYTHING(n_rows_0), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'n_rows_0': t0.n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index 34dcbfaf6..4aaab2b34 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -10,7 +10,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 92fd8f422..7c5da2407 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,5 +1,5 @@ ROOT(columns=[('yr', year_ca_dt), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year_ca_dt):asc_last], cumulative=True) / RELSUM(args=[sum_expr], partition=[], order=[(year_ca_dt):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * sum_expr - PREV(args=[sum_expr], partition=[], order=[(year_ca_dt):asc_last]) / PREV(args=[sum_expr], partition=[], order=[(year_ca_dt):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year_ca_dt):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year_ca_dt):asc_last]), 2:numeric)), ('bought', sum_expr), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year_ca_dt):asc_first]) - FILTER(condition=DEFAULT_TO(sum_expr_3, 0:numeric) > 0:numeric, columns={'sum_expr': sum_expr_3, 'sum_n_rows': sum_n_rows, 'year_ca_dt': year_ca_dt}) + FILTER(condition=DEFAULT_TO(sum_expr_3, 0:numeric) != 0:numeric, columns={'sum_expr': sum_expr_3, 'sum_n_rows': sum_n_rows, 'year_ca_dt': year_ca_dt}) AGGREGATE(keys={'year_ca_dt': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) diff --git a/tests/test_pydough_functions/common_prefix_pydough_functions.py b/tests/test_pydough_functions/common_prefix_pydough_functions.py index fc90c1267..93c88f337 100644 --- a/tests/test_pydough_functions/common_prefix_pydough_functions.py +++ b/tests/test_pydough_functions/common_prefix_pydough_functions.py @@ -728,7 +728,7 @@ def common_prefix_al(): .WHERE(HAS(selected_lines)) .TOP_K(10, by=key.ASC()) .CALCULATE(cust_key=key, n_orders=n_orders, n_no_tax_discount=n_no_tax_discount) - .WHERE(HAS(selected_part_purchase) & (COUNT(selected_part_purchase) > 0)) + .WHERE(HAS(selected_part_purchase) & (COUNT(selected_part_purchase) != 0)) ) @@ -745,7 +745,7 @@ def common_prefix_am(): .WHERE( HAS(selected_lines) & HAS(selected_part_purchase) - & (COUNT(selected_part_purchase) > 0) + & (COUNT(selected_part_purchase) != 0) ) .CALCULATE( cust_key=key, n_orders=n_orders, n_no_tax_discount=COUNT(selected_lines) @@ -765,7 +765,7 @@ def common_prefix_an(): .WHERE( (COUNT(orders) > RELAVG(COUNT(orders), per="nations")) & HAS(selected_part_purchase) - & (COUNT(selected_part_purchase) > 0) + & (COUNT(selected_part_purchase) != 0) ) .CALCULATE( cust_key=key, diff --git a/tests/test_pydough_functions/correlated_pydough_functions.py b/tests/test_pydough_functions/correlated_pydough_functions.py index d2f3d8d8e..8fe32bfa8 100644 --- a/tests/test_pydough_functions/correlated_pydough_functions.py +++ b/tests/test_pydough_functions/correlated_pydough_functions.py @@ -209,7 +209,7 @@ def correl_13(): supplier_info = suppliers.WHERE(nation_key <= 3).CALCULATE( avg_price=AVG(supply_records.part.retail_price) ) - selected_suppliers = supplier_info.WHERE(COUNT(selected_supply_records) > 0) + selected_suppliers = supplier_info.WHERE(COUNT(selected_supply_records) != 0) return TPCH.CALCULATE(n=COUNT(selected_suppliers)) @@ -748,7 +748,7 @@ def correl_35(): ) .order.WHERE(YEAR(order_date) == 1998) .CALCULATE(original_priority=order_priority) - .WHERE(COUNT(alt_orders) > 0) + .WHERE(COUNT(alt_orders) != 0) ) ) @@ -778,7 +778,7 @@ def correl_36(): ) .order.WHERE(YEAR(order_date) == 1998) .CALCULATE(original_priority=order_priority) - .WHERE(COUNT(alt_orders) > 0) + .WHERE(COUNT(alt_orders) != 0) ) ) diff --git a/tests/test_pydough_functions/defog_test_functions.py b/tests/test_pydough_functions/defog_test_functions.py index 35525bb36..8ee42a0fa 100644 --- a/tests/test_pydough_functions/defog_test_functions.py +++ b/tests/test_pydough_functions/defog_test_functions.py @@ -287,7 +287,7 @@ def impl_defog_broker_adv8(): & (date_time >= DATETIME("now", "start of week", "-1 week")) ) return Broker.CALCULATE( - n_transactions=KEEP_IF(COUNT(selected_txns), COUNT(selected_txns) > 0), + n_transactions=KEEP_IF(COUNT(selected_txns), COUNT(selected_txns) != 0), total_amount=SUM(selected_txns.amount), ) @@ -743,12 +743,14 @@ def impl_defog_dealership_adv4(): """ date_threshold = DATETIME("now", "-30 days") - selected_sales = sale_records.WHERE(sale_date >= date_threshold) + selected_sales = cars.WHERE(CONTAINS(LOWER(make), "toyota")).sale_records.WHERE( + sale_date >= date_threshold + ) - return cars.WHERE(CONTAINS(LOWER(make), "toyota")).CALCULATE( + return Dealership.CALCULATE( num_sales=COUNT(selected_sales), total_revenue=KEEP_IF( - SUM(selected_sales.sale_price), COUNT(selected_sales) > 0 + SUM(selected_sales.sale_price), COUNT(selected_sales) != 0 ), ) @@ -1331,7 +1333,7 @@ def impl_defog_ewallet_adv4(): # Calculate the number of transactions and the total amount for the filtered transactions return Ewallet.CALCULATE( num_transactions=COUNT(us_transactions), - total_amount=KEEP_IF(SUM(us_transactions.amount), COUNT(us_transactions) > 0), + total_amount=KEEP_IF(SUM(us_transactions.amount), COUNT(us_transactions) != 0), ) @@ -2564,7 +2566,7 @@ def impl_defog_academic_gen11(): n_pub = COUNT(publications) n_auth = COUNT(authors) return Academic.CALCULATE( - publication_to_author_ratio=n_pub / KEEP_IF(n_auth, n_auth > 0) + publication_to_author_ratio=n_pub / KEEP_IF(n_auth, n_auth != 0) ) @@ -2578,7 +2580,7 @@ def impl_defog_academic_gen12(): """ n_confs = SUM(PRESENT(publications.conference_id)) n_jours = SUM(PRESENT(publications.journal_id)) - return Academic.CALCULATE(ratio=n_confs / KEEP_IF(n_jours, n_jours > 0)) + return Academic.CALCULATE(ratio=n_confs / KEEP_IF(n_jours, n_jours != 0)) def impl_defog_academic_gen13(): @@ -2592,7 +2594,7 @@ def impl_defog_academic_gen13(): n_pubs = COUNT(domain_publications) n_keys = COUNT(domain_keywords) - return domains.CALCULATE(domain_id, ratio=n_pubs / KEEP_IF(n_keys, n_keys > 0)) + return domains.CALCULATE(domain_id, ratio=n_pubs / KEEP_IF(n_keys, n_keys != 0)) def impl_defog_academic_gen14(): @@ -2609,7 +2611,7 @@ def impl_defog_academic_gen14(): year, num_publications=n_pubs, num_journals=n_jours, - ratio=n_pubs / KEEP_IF(n_jours, n_jours > 0), + ratio=n_pubs / KEEP_IF(n_jours, n_jours != 0), ) diff --git a/tests/test_pydough_functions/epoch_pydough_functions.py b/tests/test_pydough_functions/epoch_pydough_functions.py index 98560a12f..15a5f851c 100644 --- a/tests/test_pydough_functions/epoch_pydough_functions.py +++ b/tests/test_pydough_functions/epoch_pydough_functions.py @@ -204,7 +204,7 @@ def overlapping_event_searches_per_user(): # most such searches, breaking ties alphabetically. same_event_other_user = events.searches.user.WHERE(name != original_user_name) selected_searches = searches.WHERE( - (COUNT(same_event_other_user) > 0) & HAS(same_event_other_user) + (COUNT(same_event_other_user) != 0) & HAS(same_event_other_user) ) return ( users.CALCULATE(original_user_name=name) diff --git a/tests/test_pydough_functions/technograph_pydough_functions.py b/tests/test_pydough_functions/technograph_pydough_functions.py index 54e875c7c..99230f9d5 100644 --- a/tests/test_pydough_functions/technograph_pydough_functions.py +++ b/tests/test_pydough_functions/technograph_pydough_functions.py @@ -158,7 +158,7 @@ def year_cumulative_incident_rate_overall(): n_devices=COUNT(calendar.devices_sold), n_incidents=COUNT(calendar.incidents_reported), ) - .WHERE(n_devices > 0) + .WHERE(n_devices != 0) .CALCULATE( yr=year, cum_ir=ROUND( diff --git a/tests/test_sql_refsols/correl_35_sqlite.sql b/tests/test_sql_refsols/correl_35_sqlite.sql index 637cf039d..16cab7b6a 100644 --- a/tests/test_sql_refsols/correl_35_sqlite.sql +++ b/tests/test_sql_refsols/correl_35_sqlite.sql @@ -54,6 +54,6 @@ JOIN _t3 AS _t3 AND _t3.c_custkey = orders.o_custkey AND _t3.c_nationkey = supplier.s_nationkey AND _t3.o_orderpriority = orders.o_orderpriority - AND _t3.sum_n_rows > 0 + AND _t3.sum_n_rows <> 0 WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 diff --git a/tests/test_sql_refsols/defog_academic_gen11_ansi.sql b/tests/test_sql_refsols/defog_academic_gen11_ansi.sql index ecf4d7f72..34b47f018 100644 --- a/tests/test_sql_refsols/defog_academic_gen11_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen11_ansi.sql @@ -8,6 +8,6 @@ WITH _s0 AS ( FROM main.author ) SELECT - _s0.n_rows / CASE WHEN _s1.n_rows > 0 THEN _s1.n_rows ELSE NULL END AS publication_to_author_ratio + _s0.n_rows / NULLIF(_s1.n_rows, 0) AS publication_to_author_ratio FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_academic_gen11_mysql.sql b/tests/test_sql_refsols/defog_academic_gen11_mysql.sql index ecf4d7f72..34b47f018 100644 --- a/tests/test_sql_refsols/defog_academic_gen11_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen11_mysql.sql @@ -8,6 +8,6 @@ WITH _s0 AS ( FROM main.author ) SELECT - _s0.n_rows / CASE WHEN _s1.n_rows > 0 THEN _s1.n_rows ELSE NULL END AS publication_to_author_ratio + _s0.n_rows / NULLIF(_s1.n_rows, 0) AS publication_to_author_ratio FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_academic_gen11_postgres.sql b/tests/test_sql_refsols/defog_academic_gen11_postgres.sql index 723ebe9af..6f3332d6c 100644 --- a/tests/test_sql_refsols/defog_academic_gen11_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen11_postgres.sql @@ -8,6 +8,6 @@ WITH _s0 AS ( FROM main.author ) SELECT - CAST(_s0.n_rows AS DOUBLE PRECISION) / CASE WHEN _s1.n_rows > 0 THEN _s1.n_rows ELSE NULL END AS publication_to_author_ratio + CAST(_s0.n_rows AS DOUBLE PRECISION) / NULLIF(_s1.n_rows, 0) AS publication_to_author_ratio FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_academic_gen11_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen11_snowflake.sql index ecf4d7f72..34b47f018 100644 --- a/tests/test_sql_refsols/defog_academic_gen11_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen11_snowflake.sql @@ -8,6 +8,6 @@ WITH _s0 AS ( FROM main.author ) SELECT - _s0.n_rows / CASE WHEN _s1.n_rows > 0 THEN _s1.n_rows ELSE NULL END AS publication_to_author_ratio + _s0.n_rows / NULLIF(_s1.n_rows, 0) AS publication_to_author_ratio FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_academic_gen11_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen11_sqlite.sql index 33a3678ed..600c593d8 100644 --- a/tests/test_sql_refsols/defog_academic_gen11_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen11_sqlite.sql @@ -8,6 +8,6 @@ WITH _s0 AS ( FROM main.author ) SELECT - CAST(_s0.n_rows AS REAL) / CASE WHEN _s1.n_rows > 0 THEN _s1.n_rows ELSE NULL END AS publication_to_author_ratio + CAST(_s0.n_rows AS REAL) / NULLIF(_s1.n_rows, 0) AS publication_to_author_ratio FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_academic_gen12_ansi.sql b/tests/test_sql_refsols/defog_academic_gen12_ansi.sql index fc4698b0b..ae4550fd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen12_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen12_ansi.sql @@ -1,3 +1,3 @@ SELECT - SUM(NOT cid IS NULL) / CASE WHEN SUM(NOT jid IS NULL) > 0 THEN SUM(NOT jid IS NULL) ELSE NULL END AS ratio + SUM(NOT cid IS NULL) / NULLIF(SUM(NOT jid IS NULL), 0) AS ratio FROM main.publication diff --git a/tests/test_sql_refsols/defog_academic_gen12_mysql.sql b/tests/test_sql_refsols/defog_academic_gen12_mysql.sql index fc4698b0b..ae4550fd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen12_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen12_mysql.sql @@ -1,3 +1,3 @@ SELECT - SUM(NOT cid IS NULL) / CASE WHEN SUM(NOT jid IS NULL) > 0 THEN SUM(NOT jid IS NULL) ELSE NULL END AS ratio + SUM(NOT cid IS NULL) / NULLIF(SUM(NOT jid IS NULL), 0) AS ratio FROM main.publication diff --git a/tests/test_sql_refsols/defog_academic_gen12_postgres.sql b/tests/test_sql_refsols/defog_academic_gen12_postgres.sql index c10a33368..8625dcab8 100644 --- a/tests/test_sql_refsols/defog_academic_gen12_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen12_postgres.sql @@ -1,7 +1,3 @@ SELECT - CAST(SUM(CASE WHEN NOT cid IS NULL THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / CASE - WHEN SUM(CASE WHEN NOT jid IS NULL THEN 1 ELSE 0 END) > 0 - THEN SUM(CASE WHEN NOT jid IS NULL THEN 1 ELSE 0 END) - ELSE NULL - END AS ratio + CAST(SUM(CASE WHEN NOT cid IS NULL THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / NULLIF(SUM(CASE WHEN NOT jid IS NULL THEN 1 ELSE 0 END), 0) AS ratio FROM main.publication diff --git a/tests/test_sql_refsols/defog_academic_gen12_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen12_snowflake.sql index 1de35fe7c..9552f439b 100644 --- a/tests/test_sql_refsols/defog_academic_gen12_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen12_snowflake.sql @@ -1,3 +1,3 @@ SELECT - COUNT_IF(NOT cid IS NULL) / CASE WHEN COUNT_IF(NOT jid IS NULL) > 0 THEN COUNT_IF(NOT jid IS NULL) ELSE NULL END AS ratio + COUNT_IF(NOT cid IS NULL) / NULLIF(COUNT_IF(NOT jid IS NULL), 0) AS ratio FROM main.publication diff --git a/tests/test_sql_refsols/defog_academic_gen12_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen12_sqlite.sql index 61b1549a7..42ac2b338 100644 --- a/tests/test_sql_refsols/defog_academic_gen12_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen12_sqlite.sql @@ -1,3 +1,3 @@ SELECT - CAST(SUM(NOT cid IS NULL) AS REAL) / CASE WHEN SUM(NOT jid IS NULL) > 0 THEN SUM(NOT jid IS NULL) ELSE NULL END AS ratio + CAST(SUM(NOT cid IS NULL) AS REAL) / NULLIF(SUM(NOT jid IS NULL), 0) AS ratio FROM main.publication diff --git a/tests/test_sql_refsols/defog_academic_gen13_ansi.sql b/tests/test_sql_refsols/defog_academic_gen13_ansi.sql index a15aac0b1..bdcb0a27f 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_ansi.sql @@ -15,13 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - COALESCE(_s1.n_rows, 0) / CASE - WHEN ( - NOT _s3.n_rows IS NULL AND _s3.n_rows > 0 - ) - THEN COALESCE(_s3.n_rows, 0) - ELSE NULL - END AS ratio + COALESCE(_s1.n_rows, 0) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen13_mysql.sql b/tests/test_sql_refsols/defog_academic_gen13_mysql.sql index a15aac0b1..bdcb0a27f 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_mysql.sql @@ -15,13 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - COALESCE(_s1.n_rows, 0) / CASE - WHEN ( - NOT _s3.n_rows IS NULL AND _s3.n_rows > 0 - ) - THEN COALESCE(_s3.n_rows, 0) - ELSE NULL - END AS ratio + COALESCE(_s1.n_rows, 0) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen13_postgres.sql b/tests/test_sql_refsols/defog_academic_gen13_postgres.sql index e2a713801..39f7ef9f2 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_postgres.sql @@ -15,13 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - CAST(COALESCE(_s1.n_rows, 0) AS DOUBLE PRECISION) / CASE - WHEN ( - NOT _s3.n_rows IS NULL AND _s3.n_rows > 0 - ) - THEN COALESCE(_s3.n_rows, 0) - ELSE NULL - END AS ratio + CAST(COALESCE(_s1.n_rows, 0) AS DOUBLE PRECISION) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen13_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen13_snowflake.sql index a15aac0b1..bdcb0a27f 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_snowflake.sql @@ -15,13 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - COALESCE(_s1.n_rows, 0) / CASE - WHEN ( - NOT _s3.n_rows IS NULL AND _s3.n_rows > 0 - ) - THEN COALESCE(_s3.n_rows, 0) - ELSE NULL - END AS ratio + COALESCE(_s1.n_rows, 0) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen13_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen13_sqlite.sql index 491db135f..7b31bd18b 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_sqlite.sql @@ -15,13 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - CAST(COALESCE(_s1.n_rows, 0) AS REAL) / CASE - WHEN ( - NOT _s3.n_rows IS NULL AND _s3.n_rows > 0 - ) - THEN COALESCE(_s3.n_rows, 0) - ELSE NULL - END AS ratio + CAST(COALESCE(_s1.n_rows, 0) AS REAL) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen14_ansi.sql b/tests/test_sql_refsols/defog_academic_gen14_ansi.sql index 3a5d0782e..f578e20e5 100644 --- a/tests/test_sql_refsols/defog_academic_gen14_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen14_ansi.sql @@ -2,7 +2,7 @@ SELECT year, COUNT(*) AS num_publications, COUNT(DISTINCT jid) AS num_journals, - COUNT(*) / CASE WHEN COUNT(DISTINCT jid) > 0 THEN COUNT(DISTINCT jid) ELSE NULL END AS ratio + COUNT(*) / NULLIF(COUNT(DISTINCT jid), 0) AS ratio FROM main.publication GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen14_mysql.sql b/tests/test_sql_refsols/defog_academic_gen14_mysql.sql index 3a5d0782e..f578e20e5 100644 --- a/tests/test_sql_refsols/defog_academic_gen14_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen14_mysql.sql @@ -2,7 +2,7 @@ SELECT year, COUNT(*) AS num_publications, COUNT(DISTINCT jid) AS num_journals, - COUNT(*) / CASE WHEN COUNT(DISTINCT jid) > 0 THEN COUNT(DISTINCT jid) ELSE NULL END AS ratio + COUNT(*) / NULLIF(COUNT(DISTINCT jid), 0) AS ratio FROM main.publication GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen14_postgres.sql b/tests/test_sql_refsols/defog_academic_gen14_postgres.sql index 553f06330..eecc51955 100644 --- a/tests/test_sql_refsols/defog_academic_gen14_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen14_postgres.sql @@ -2,7 +2,7 @@ SELECT year, COUNT(*) AS num_publications, COUNT(DISTINCT jid) AS num_journals, - CAST(COUNT(*) AS DOUBLE PRECISION) / CASE WHEN COUNT(DISTINCT jid) > 0 THEN COUNT(DISTINCT jid) ELSE NULL END AS ratio + CAST(COUNT(*) AS DOUBLE PRECISION) / NULLIF(COUNT(DISTINCT jid), 0) AS ratio FROM main.publication GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen14_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen14_snowflake.sql index 3a5d0782e..f578e20e5 100644 --- a/tests/test_sql_refsols/defog_academic_gen14_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen14_snowflake.sql @@ -2,7 +2,7 @@ SELECT year, COUNT(*) AS num_publications, COUNT(DISTINCT jid) AS num_journals, - COUNT(*) / CASE WHEN COUNT(DISTINCT jid) > 0 THEN COUNT(DISTINCT jid) ELSE NULL END AS ratio + COUNT(*) / NULLIF(COUNT(DISTINCT jid), 0) AS ratio FROM main.publication GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen14_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen14_sqlite.sql index 3113ffd43..0a2b57a4f 100644 --- a/tests/test_sql_refsols/defog_academic_gen14_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen14_sqlite.sql @@ -2,7 +2,7 @@ SELECT year, COUNT(*) AS num_publications, COUNT(DISTINCT jid) AS num_journals, - CAST(COUNT(*) AS REAL) / CASE WHEN COUNT(DISTINCT jid) > 0 THEN COUNT(DISTINCT jid) ELSE NULL END AS ratio + CAST(COUNT(*) AS REAL) / NULLIF(COUNT(DISTINCT jid), 0) AS ratio FROM main.publication GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql index 666371daa..3d88d5356 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql @@ -1,5 +1,5 @@ SELECT - CASE WHEN COUNT(*) > 0 THEN COUNT(*) ELSE NULL END AS n_transactions, + NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction JOIN main.sbcustomer AS sbcustomer diff --git a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql index d3c6c59dd..17fb88b4a 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql @@ -8,7 +8,7 @@ WITH _u_0 AS ( 1 ) SELECT - CASE WHEN COUNT(*) > 0 THEN COUNT(*) ELSE NULL END AS n_transactions, + NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbTransaction.sbtxamount), 0) AS total_amount FROM main.sbTransaction AS sbTransaction LEFT JOIN _u_0 AS _u_0 diff --git a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql index 7d014559a..26080ce78 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql @@ -8,7 +8,7 @@ WITH _u_0 AS ( 1 ) SELECT - CASE WHEN COUNT(*) > 0 THEN COUNT(*) ELSE NULL END AS n_transactions, + NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction LEFT JOIN _u_0 AS _u_0 diff --git a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql index 50141aab4..37f29683c 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql @@ -8,7 +8,7 @@ WITH _u_0 AS ( 1 ) SELECT - CASE WHEN COUNT(*) > 0 THEN COUNT(*) ELSE NULL END AS n_transactions, + NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction LEFT JOIN _u_0 AS _u_0 diff --git a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql index 959dad00c..b94aec102 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql @@ -8,7 +8,7 @@ WITH _u_0 AS ( 1 ) SELECT - CASE WHEN COUNT(*) > 0 THEN COUNT(*) ELSE NULL END AS n_transactions, + NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction LEFT JOIN _u_0 AS _u_0 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql index a7f38d408..6a01e71f2 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_ansi.sql @@ -1,24 +1,9 @@ -WITH _s1 AS ( - SELECT - car_id, - sale_price - FROM main.sales - WHERE - sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) -) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE - WHEN ( - NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 - ) - THEN COALESCE(SUM(_s1.sale_price), 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + CASE WHEN COUNT(*) <> 0 THEN COALESCE(SUM(sales.sale_price), 0) ELSE NULL END AS total_revenue FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id +JOIN main.sales AS sales + ON cars._id = sales.car_id + AND sales.sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), 30, DAY) WHERE LOWER(cars.make) LIKE '%toyota%' -GROUP BY - cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql index 71a7d9b41..db03b2dea 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_mysql.sql @@ -1,24 +1,9 @@ -WITH _s1 AS ( - SELECT - car_id, - sale_price - FROM main.sales - WHERE - sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) -) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE - WHEN ( - NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 - ) - THEN COALESCE(SUM(_s1.sale_price), 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + CASE WHEN COUNT(*) <> 0 THEN COALESCE(SUM(sales.sale_price), 0) ELSE NULL END AS total_revenue FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id +JOIN main.sales AS sales + ON cars._id = sales.car_id + AND sales.sale_date >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '30' DAY) WHERE LOWER(cars.make) LIKE '%toyota%' -GROUP BY - cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql index 72140b7e6..9ea071f87 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_postgres.sql @@ -1,24 +1,9 @@ -WITH _s1 AS ( - SELECT - car_id, - sale_price - FROM main.sales - WHERE - sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' -) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE - WHEN ( - NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 - ) - THEN COALESCE(SUM(_s1.sale_price), 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + CASE WHEN COUNT(*) <> 0 THEN COALESCE(SUM(sales.sale_price), 0) ELSE NULL END AS total_revenue FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id +JOIN main.sales AS sales + ON cars._id = sales.car_id + AND sales.sale_date >= CURRENT_TIMESTAMP - INTERVAL '30 DAY' WHERE LOWER(cars.make) LIKE '%toyota%' -GROUP BY - cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql index 27cf9deaf..3239142db 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -1,24 +1,9 @@ -WITH _s1 AS ( - SELECT - car_id, - sale_price - FROM main.sales - WHERE - sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) -) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE - WHEN ( - NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 - ) - THEN COALESCE(SUM(_s1.sale_price), 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + CASE WHEN COUNT(*) <> 0 THEN COALESCE(SUM(sales.sale_price), 0) ELSE NULL END AS total_revenue FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id +JOIN main.sales AS sales + ON cars._id = sales.car_id + AND sales.sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) WHERE CONTAINS(LOWER(cars.make), 'toyota') -GROUP BY - cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql index 90d404adf..8847c48ad 100644 --- a/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv4_sqlite.sql @@ -1,24 +1,8 @@ -WITH _s1 AS ( - SELECT - car_id, - sale_price - FROM main.sales - WHERE - sale_date >= DATETIME('now', '-30 day') -) SELECT - COUNT(_s1.car_id) AS num_sales, - CASE - WHEN ( - NOT NULLIF(COUNT(_s1.car_id), 0) IS NULL AND NULLIF(COUNT(_s1.car_id), 0) > 0 - ) - THEN COALESCE(SUM(_s1.sale_price), 0) - ELSE NULL - END AS total_revenue + COUNT(*) AS num_sales, + CASE WHEN COUNT(*) <> 0 THEN COALESCE(SUM(sales.sale_price), 0) ELSE NULL END AS total_revenue FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id +JOIN main.sales AS sales + ON cars._id = sales.car_id AND sales.sale_date >= DATETIME('now', '-30 day') WHERE LOWER(cars.make) LIKE '%toyota%' -GROUP BY - cars._id diff --git a/tests/test_sql_refsols/defog_ewallet_adv4_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv4_ansi.sql index b8077b272..2d7f76170 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv4_ansi.sql @@ -1,7 +1,7 @@ SELECT COUNT(*) AS num_transactions, CASE - WHEN COUNT(*) > 0 + WHEN COUNT(*) <> 0 THEN COALESCE(SUM(wallet_transactions_daily.amount), 0) ELSE NULL END AS total_amount diff --git a/tests/test_sql_refsols/defog_ewallet_adv4_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv4_mysql.sql index 7a2285708..432086c72 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv4_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv4_mysql.sql @@ -1,7 +1,7 @@ SELECT COUNT(*) AS num_transactions, CASE - WHEN COUNT(*) > 0 + WHEN COUNT(*) <> 0 THEN COALESCE(SUM(wallet_transactions_daily.amount), 0) ELSE NULL END AS total_amount diff --git a/tests/test_sql_refsols/defog_ewallet_adv4_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv4_postgres.sql index c1dfcc764..ab41f723f 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv4_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv4_postgres.sql @@ -1,7 +1,7 @@ SELECT COUNT(*) AS num_transactions, CASE - WHEN COUNT(*) > 0 + WHEN COUNT(*) <> 0 THEN COALESCE(SUM(wallet_transactions_daily.amount), 0) ELSE NULL END AS total_amount diff --git a/tests/test_sql_refsols/defog_ewallet_adv4_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv4_snowflake.sql index b6ab12c52..51d6034ea 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv4_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv4_snowflake.sql @@ -1,7 +1,7 @@ SELECT COUNT(*) AS num_transactions, CASE - WHEN COUNT(*) > 0 + WHEN COUNT(*) <> 0 THEN COALESCE(SUM(wallet_transactions_daily.amount), 0) ELSE NULL END AS total_amount diff --git a/tests/test_sql_refsols/defog_ewallet_adv4_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv4_sqlite.sql index 4a5b8a56b..25d13401e 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv4_sqlite.sql @@ -1,7 +1,7 @@ SELECT COUNT(*) AS num_transactions, CASE - WHEN COUNT(*) > 0 + WHEN COUNT(*) <> 0 THEN COALESCE(SUM(wallet_transactions_daily.amount), 0) ELSE NULL END AS total_amount diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index 9519f26b9..fe8b9ea5a 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -1,9 +1,6 @@ SELECT ANY_VALUE(coupons.code) AS coupon_code, - COALESCE( - CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, - 0 - ) AS redemption_count, + COUNT(wallet_transactions_daily.txid) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql index 9519f26b9..fe8b9ea5a 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_mysql.sql @@ -1,9 +1,6 @@ SELECT ANY_VALUE(coupons.code) AS coupon_code, - COALESCE( - CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, - 0 - ) AS redemption_count, + COUNT(wallet_transactions_daily.txid) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql index 456e768b8..a2ac158c4 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_postgres.sql @@ -1,9 +1,6 @@ SELECT MAX(coupons.code) AS coupon_code, - COALESCE( - CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, - 0 - ) AS redemption_count, + COUNT(wallet_transactions_daily.txid) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql index 2d38be644..f2308b204 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql @@ -1,9 +1,6 @@ SELECT ANY_VALUE(coupons.code) AS coupon_code, - COALESCE( - CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, - 0 - ) AS redemption_count, + COUNT(wallet_transactions_daily.txid) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index 0142da0d8..c40521c08 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -1,9 +1,6 @@ SELECT MAX(coupons.code) AS coupon_code, - COALESCE( - CASE WHEN COUNT(*) <> 0 THEN COUNT(wallet_transactions_daily.txid) ELSE NULL END, - 0 - ) AS redemption_count, + COUNT(wallet_transactions_daily.txid) AS redemption_count, COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount FROM main.coupons AS coupons LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index ea57003fc..bec8c788b 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -59,6 +59,6 @@ SELECT COALESCE(sum_n_rows, 0) AS incidents FROM _t1 WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 + NOT sum_expr_3 IS NULL AND sum_expr_3 <> 0 ORDER BY 1 diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_mysql.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_mysql.sql index 91ff1b10e..4c248c546 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_mysql.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_mysql.sql @@ -59,6 +59,6 @@ SELECT COALESCE(sum_n_rows, 0) AS incidents FROM _t1 WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 + NOT sum_expr_3 IS NULL AND sum_expr_3 <> 0 ORDER BY 1 diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_postgres.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_postgres.sql index d2d15d49b..03f872de7 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_postgres.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_postgres.sql @@ -59,6 +59,6 @@ SELECT COALESCE(sum_n_rows, 0) AS incidents FROM _t1 WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 + NOT sum_expr_3 IS NULL AND sum_expr_3 <> 0 ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_snowflake.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_snowflake.sql index 14aa8b03c..4c5b0ee35 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_snowflake.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_snowflake.sql @@ -59,6 +59,6 @@ SELECT COALESCE(sum_n_rows, 0) AS incidents FROM _t1 WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 + NOT sum_expr_3 IS NULL AND sum_expr_3 <> 0 ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index e7079307e..256921e47 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -59,6 +59,6 @@ SELECT COALESCE(sum_n_rows, 0) AS incidents FROM _t1 WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 + NOT sum_expr_3 IS NULL AND sum_expr_3 <> 0 ORDER BY 1 From 08422675efb4cfd890f804e1e1a6cab8e791b0c8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 23:04:52 -0400 Subject: [PATCH 123/143] Rewriting SUM(NULLIF(x, 0)) to SUM(x) [RUN CI] --- pydough/sqlglot/override_simplify.py | 27 +++++++++++++++++++ .../defog_academic_gen15_ansi.sql | 2 +- .../defog_academic_gen15_mysql.sql | 2 +- .../defog_academic_gen15_postgres.sql | 2 +- .../defog_academic_gen15_snowflake.sql | 2 +- .../defog_academic_gen15_sqlite.sql | 2 +- .../defog_dealership_adv3_ansi.sql | 2 +- .../defog_dealership_adv3_mysql.sql | 2 +- .../defog_dealership_adv3_postgres.sql | 2 +- .../defog_dealership_adv3_snowflake.sql | 2 +- .../defog_dealership_adv3_sqlite.sql | 2 +- ...raph_country_combination_analysis_ansi.sql | 2 +- ...aph_country_combination_analysis_mysql.sql | 2 +- ..._country_combination_analysis_postgres.sql | 2 +- ...country_combination_analysis_snowflake.sql | 2 +- ...ph_country_combination_analysis_sqlite.sql | 2 +- ...ph_country_incident_rate_analysis_ansi.sql | 2 +- ...h_country_incident_rate_analysis_mysql.sql | 2 +- ...ountry_incident_rate_analysis_postgres.sql | 2 +- ...untry_incident_rate_analysis_snowflake.sql | 2 +- ..._country_incident_rate_analysis_sqlite.sql | 2 +- ...r_rate_sun_set_by_factory_country_ansi.sql | 2 +- ..._rate_sun_set_by_factory_country_mysql.sql | 2 +- ...te_sun_set_by_factory_country_postgres.sql | 2 +- ...e_sun_set_by_factory_country_snowflake.sql | 2 +- ...rate_sun_set_by_factory_country_sqlite.sql | 2 +- 26 files changed, 52 insertions(+), 25 deletions(-) diff --git a/pydough/sqlglot/override_simplify.py b/pydough/sqlglot/override_simplify.py index 53ee4cc12..73509630b 100644 --- a/pydough/sqlglot/override_simplify.py +++ b/pydough/sqlglot/override_simplify.py @@ -128,6 +128,7 @@ def _simplify(expression, root=True): # PyDough Change: new pre-order transformations node = rewrite_case_to_nullif(node) node = rewrite_coalesce_nullif(node) + node = rewrite_sum_nullif(node) if constant_propagation: node = propagate_constants(node, root) @@ -312,3 +313,29 @@ def rewrite_coalesce_nullif(expr: exp.Expression) -> exp.Expression: default=lhs, copy=False, ) + + +def rewrite_sum_nullif(expr: exp.Expression) -> exp.Expression: + """ + Rewrite `SUM(NULLIF(x, 0))` to `SUM(x)`. + + Args: + `expr`: The expression to rewrite. + + Returns: + The rewritten expression. + """ + if not isinstance(expr, exp.Sum): + return expr + + arg = expr.this + if not isinstance(arg, exp.Nullif): + return expr + + lhs = arg.args.get("this") + rhs = arg.args.get("expression") + + if isinstance(rhs, exp.Literal) and rhs.is_number and float(rhs.this) == 0: + return exp.Sum(this=lhs, copy=False) + + return expr diff --git a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql index f4bf02ae7..6aff22630 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_ansi.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(NULLIF(count_oid, 0)), 0) / COUNT(*) AS ratio + COALESCE(SUM(count_oid), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql index f4bf02ae7..6aff22630 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_mysql.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(NULLIF(count_oid, 0)), 0) / COUNT(*) AS ratio + COALESCE(SUM(count_oid), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql index 871e47887..f32c244df 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_postgres.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - CAST(COALESCE(SUM(NULLIF(count_oid, 0)), 0) AS DOUBLE PRECISION) / COUNT(*) AS ratio + CAST(COALESCE(SUM(count_oid), 0) AS DOUBLE PRECISION) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql index 38ba10766..976e19ef5 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_snowflake.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - COALESCE(SUM(NULLIF(count_oid, 0)), 0) / COUNT(*) AS ratio + COALESCE(SUM(count_oid), 0) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql index dad2b9f3e..e7a243354 100644 --- a/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen15_sqlite.sql @@ -10,7 +10,7 @@ WITH _t1 AS ( ) SELECT anything_continent AS continent, - CAST(COALESCE(SUM(NULLIF(count_oid, 0)), 0) AS REAL) / COUNT(*) AS ratio + CAST(COALESCE(SUM(count_oid), 0) AS REAL) / COUNT(*) AS ratio FROM _t1 GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql index 30a2cb178..a14e5f434 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_ansi.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales + COALESCE(SUM(count_car_id), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql index 30a2cb178..a14e5f434 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_mysql.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales + COALESCE(SUM(count_car_id), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql index 50a39be6d..7fc3cc4b3 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_postgres.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales + COALESCE(SUM(count_car_id), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql index f678569a6..2c6607199 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales + COALESCE(SUM(count_car_id), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql index 50a39be6d..7fc3cc4b3 100644 --- a/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv3_sqlite.sql @@ -14,7 +14,7 @@ WITH _t1 AS ( SELECT anything_make AS make, anything_model AS model, - COALESCE(SUM(NULLIF(count_car_id, 0)), 0) AS num_sales + COALESCE(SUM(count_car_id), 0) AS num_sales FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index 20c766e89..f7eb33c2f 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql index bbc0e0461..4da22fe5e 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_mysql.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql index 90a80c8b1..eb48d5a1a 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_postgres.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql index 600d1f4f6..7d0b311b7 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index 8532a5ea6..3818c4c74 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -26,7 +26,7 @@ WITH _s0 AS ( anything__id_3, anything_co_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t1 GROUP BY 1, diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index 10925d008..9e9dea053 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql index ec9aac85b..c95eb1d4c 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_mysql.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql index 1b7347723..c6b655158 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_postgres.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql index 285263516..7aa3fc849 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index 8f5994503..390e36aa8 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -51,7 +51,7 @@ WITH _t2 AS ( SELECT anything_us_country_id, COUNT(*) AS n_rows, - SUM(NULLIF(count_in_device_id, 0)) AS sum_n_rows + SUM(count_in_device_id) AS sum_n_rows FROM _t5 GROUP BY 1 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index b7b08d902..44211d2ed 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, + COALESCE(SUM(count_in_device_id), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql index 5aac2d9f9..e9909a422 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_mysql.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( DEVICES.de_id ), _s5 AS ( SELECT - COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, + COALESCE(SUM(count_in_device_id), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql index adc28a5fd..f4f1602c9 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_postgres.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, + COALESCE(SUM(count_in_device_id), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql index b68ca2880..70ee512ec 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, + COALESCE(SUM(count_in_device_id), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index c5d444f5a..53eab184f 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -11,7 +11,7 @@ WITH _t2 AS ( devices.de_id ), _s5 AS ( SELECT - COALESCE(SUM(NULLIF(count_in_device_id, 0)), 0) AS sum_n_incidents, + COALESCE(SUM(count_in_device_id), 0) AS sum_n_incidents, anything_de_production_country_id, COUNT(*) AS n_rows FROM _t2 From 115a926279537d14a4b9392fa1d69fd3af6898ac Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 23:32:23 -0400 Subject: [PATCH 124/143] Adding pullup before re-doing aggregation removal [RUN CI] --- pydough/conversion/relational_converter.py | 6 ++-- tests/test_plan_refsols/common_prefix_ag.txt | 4 +-- tests/test_plan_refsols/common_prefix_ah.txt | 2 +- tests/test_plan_refsols/common_prefix_ai.txt | 2 +- tests/test_plan_refsols/common_prefix_aj.txt | 4 +-- tests/test_plan_refsols/common_prefix_ak.txt | 4 +-- tests/test_plan_refsols/common_prefix_an.txt | 2 +- tests/test_plan_refsols/common_prefix_ao.txt | 2 +- tests/test_plan_refsols/common_prefix_u.txt | 2 +- tests/test_plan_refsols/common_prefix_x.txt | 2 +- tests/test_plan_refsols/common_prefix_y.txt | 4 +-- tests/test_plan_refsols/correl_13.txt | 19 ++++++------ .../customer_largest_order_deltas.txt | 2 +- .../epoch_intra_season_searches.txt | 2 +- .../quantile_function_test_2.txt | 2 +- .../quantile_function_test_3.txt | 2 +- .../quantile_function_test_4.txt | 2 +- tests/test_plan_refsols/singular7.txt | 2 +- .../supplier_pct_national_qty.txt | 2 +- ...chnograph_country_combination_analysis.txt | 2 +- ...nograph_country_incident_rate_analysis.txt | 2 +- ..._error_rate_sun_set_by_factory_country.txt | 2 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- .../window_filter_order_1.txt | 2 +- .../window_filter_order_2.txt | 2 +- .../window_filter_order_3.txt | 2 +- .../window_filter_order_8.txt | 2 +- tests/test_sql_refsols/correl_13_sqlite.sql | 29 ++++++++----------- 28 files changed, 54 insertions(+), 58 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 75f9976d2..0d7d2e06d 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1591,8 +1591,9 @@ def optimize_relational_tree( # B: expression simplification # C: filter pushdown # D: join-aggregate transpose - # E: redundant aggregation removal - # F: column pruning + # E: projection pullup again + # F: redundant aggregation removal + # G: column pruning # This is done because pullup will create more opportunities for expression # simplification, which will allow more filters to be pushed further down, # and the combination of those together will create more opportunities for @@ -1603,6 +1604,7 @@ def optimize_relational_tree( simplify_expressions(root, session, additional_shuttles) root = confirm_root(push_filters(root, session)) root = confirm_root(pull_aggregates_above_joins(root)) + root = confirm_root(pullup_projections(root)) root = remove_redundant_aggs(root) root = pruner.prune_unused_columns(root) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 2e983d6d3..06810301d 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) @@ -11,7 +11,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey != 0:numeric), 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index c12b30057..266aea028 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_o_orderkey, count_o_orderkey != 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index ad071af12..9ba0f521a 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + AGGREGATE(keys={'c_custkey': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index dc94f8c6d..c2046982c 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) @@ -11,7 +11,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey != 0:numeric), 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index bd19864d4..024eece8f 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'c_custkey_0': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'c_custkey': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) @@ -11,7 +11,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'n_rows': KEEP_IF(count_o_orderkey, count_o_orderkey != 0:numeric)}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey_0': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey_0}, aggregations={'count_o_orderkey': COUNT(o_orderkey)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderkey_0': t0.o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 0ba6c7410..e4dcc0ca1 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -11,7 +11,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(KEEP_IF(count_any FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric)}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + AGGREGATE(keys={'l_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 48ac17aa7..d8f0bcb8c 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -12,7 +12,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(anything_n_rows, SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric))}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + AGGREGATE(keys={'l_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index a5309f7f7..f4fc2e1e8 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -7,7 +7,7 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:n SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric)), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey), 'sum_l_quantity': SUM(l_quantity)}) + AGGREGATE(keys={'l_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_quantity': t1.l_quantity, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 573e4103b..bd980d6e2 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -3,7 +3,7 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_o_custkey': anything_o_custkey, 'n_rows': n_rows}) AGGREGATE(keys={'anything_o_custkey': anything_o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric))}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + AGGREGATE(keys={'l_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 9e388ef51..440747c1c 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,10 +1,10 @@ ROOT(columns=[('name', anything_c_name), ('n_orders', DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric))], orderings=[(DEFAULT_TO(KEEP_IF(count_anything_o_custkey, count_anything_o_custkey != 0:numeric), 0:numeric)):desc_last, (anything_c_name):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'anything_c_name': anything_c_name, 'count_anything_o_custkey': count_anything_o_custkey}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'o_custkey': c_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'count_anything_o_custkey': COUNT(anything_o_custkey), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.anything_o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_custkey': t1.anything_o_custkey, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) PROJECT(columns={'anything_o_custkey': anything_o_custkey, 'n_rows': KEEP_IF(count_l_orderkey, count_l_orderkey != 0:numeric)}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) + AGGREGATE(keys={'l_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'count_l_orderkey': COUNT(l_orderkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_13.txt b/tests/test_plan_refsols/correl_13.txt index 781307ab7..bbc2f4c36 100644 --- a/tests/test_plan_refsols/correl_13.txt +++ b/tests/test_plan_refsols/correl_13.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t1.ps_suppkey}) - FILTER(condition=s_nationkey <= 3:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) +ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t1.ps_suppkey}) + FILTER(condition=s_nationkey <= 3:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index d9e8965b6..1c1fd82c3 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -5,7 +5,7 @@ ROOT(columns=[('name', anything_c_name), ('largest_diff', IFF(ABS(min_revenue_de FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[anything_o_custkey], order=[(anything_o_orderdate):asc_last])), columns={'anything_o_custkey': anything_o_custkey, 'anything_o_orderdate': anything_o_orderdate, 'sum_r': sum_r}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'anything_o_orderdate': ANYTHING(o_orderdate), 'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) + AGGREGATE(keys={'l_orderkey': o_orderkey}, aggregations={'anything_o_custkey': ANYTHING(o_custkey), 'anything_o_orderdate': ANYTHING(o_orderdate), 'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 343d44793..81911c559 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,7 +1,7 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(KEEP_IF(count_search_id, count_search_id != 0:numeric), 0:numeric) > 0:numeric)}) - AGGREGATE(keys={'s_name': s_name, 'search_id_0': search_id_0}, aggregations={'count_search_id': COUNT(search_id)}) + AGGREGATE(keys={'s_name': s_name, 'search_id': search_id_0}, aggregations={'count_search_id': COUNT(search_id)}) JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_id_0': t0.search_id}) JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index 7b87e14ce..d1881c827 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + AGGREGATE(keys={'c_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index 7b87e14ce..d1881c827 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + AGGREGATE(keys={'c_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index dbcd07ede..1edc5f3ac 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', anything_r_name), ('nation_name', anything_n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) + AGGREGATE(keys={'c_nationkey': n_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_totalprice': t1.o_totalprice, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 2e54bea89..8251cf845 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -4,7 +4,7 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_ord SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) PROJECT(columns={'anything_p_name': anything_p_name, 'n_orders': DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric), 'ps_suppkey': ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) + AGGREGATE(keys={'l_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index 9cb1bede4..d3a24fe65 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,5 +1,5 @@ ROOT(columns=[('supplier_name', anything_s_name), ('nation_name', anything_n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[anything_s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[anything_s_nationkey], order=[])):desc_last], limit=5:numeric) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_s_name': ANYTHING(s_name), 'anything_s_nationkey': ANYTHING(s_nationkey), 'sum_l_quantity': SUM(l_quantity)}) + AGGREGATE(keys={'l_suppkey': s_suppkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_s_name': ANYTHING(s_name), 'anything_s_nationkey': ANYTHING(s_nationkey), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 5d9a3cfbd..915f73bcc 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -4,7 +4,7 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id != 0:numeric))}) - AGGREGATE(keys={'de_id': de_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'count_in_device_id': COUNT(in_device_id)}) + AGGREGATE(keys={'in_device_id': de_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t0.de_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index 16b5206d0..f72a0e8b1 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -14,7 +14,7 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'anything_us_country_id': anything_us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id != 0:numeric))}) - AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_us_country_id': ANYTHING(us_country_id), 'count_in_device_id': COUNT(in_device_id)}) + AGGREGATE(keys={'in_device_id': de_id}, aggregations={'anything_us_country_id': ANYTHING(us_country_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'in_device_id': t1.in_device_id, 'us_country_id': t0.us_country_id}) JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index a594887a4..4cbd2dc77 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -3,7 +3,7 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:n SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) PROJECT(columns={'anything_de_production_country_id': anything_de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric)}) AGGREGATE(keys={'anything_de_production_country_id': anything_de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id != 0:numeric))}) - AGGREGATE(keys={'de_id': de_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'count_in_device_id': COUNT(in_device_id)}) + AGGREGATE(keys={'in_device_id': de_id}, aggregations={'anything_de_production_country_id': ANYTHING(de_production_country_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id, 'in_device_id': t1.in_device_id}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index d573d9ff5..0d8a6953e 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,5 +1,5 @@ ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_anything_l_suppkey, count_anything_l_suppkey != 0:numeric), 0:numeric))], orderings=[(DEFAULT_TO(KEEP_IF(count_anything_l_suppkey, count_anything_l_suppkey != 0:numeric), 0:numeric)):desc_last, (anything_s_name):asc_first], limit=10:numeric) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'count_anything_l_suppkey': COUNT(anything_l_suppkey)}) + AGGREGATE(keys={'anything_l_suppkey': s_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'count_anything_l_suppkey': COUNT(anything_l_suppkey)}) JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t1.anything_l_suppkey, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index 21d4fcb51..fb1fe6e66 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric) != 0:numeric & DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) + AGGREGATE(keys={'o_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index 21d4fcb51..fb1fe6e66 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric) != 0:numeric & DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) + AGGREGATE(keys={'o_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index 21d4fcb51..fb1fe6e66 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric) != 0:numeric & DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric) < RELAVG(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) + AGGREGATE(keys={'o_custkey': c_custkey}, aggregations={'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 254011631..b0d2c5307 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=ABSENT(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric)) & anything_c_acctbal < RELSUM(args=[DEFAULT_TO(KEEP_IF(count_o_custkey, count_o_custkey != 0:numeric), 0:numeric)], partition=[], order=[]), columns={}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'count_o_custkey': COUNT(o_custkey)}) + AGGREGATE(keys={'o_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'count_o_custkey': COUNT(o_custkey)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'o_custkey': t1.o_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_sql_refsols/correl_13_sqlite.sql b/tests/test_sql_refsols/correl_13_sqlite.sql index defc86a28..fb6428421 100644 --- a/tests/test_sql_refsols/correl_13_sqlite.sql +++ b/tests/test_sql_refsols/correl_13_sqlite.sql @@ -1,18 +1,13 @@ -WITH _t0 AS ( - SELECT DISTINCT - partsupp.ps_suppkey - FROM tpch.supplier AS supplier - JOIN tpch.partsupp AS partsupp - ON partsupp.ps_suppkey = supplier.s_suppkey - JOIN tpch.part AS part - ON part.p_container LIKE 'SM%' - AND part.p_partkey = partsupp.ps_partkey - AND part.p_retailprice < ( - partsupp.ps_supplycost * 1.5 - ) - WHERE - supplier.s_nationkey <= 3 -) SELECT - COUNT(*) AS n -FROM _t0 + COUNT(DISTINCT partsupp.ps_suppkey) AS n +FROM tpch.supplier AS supplier +JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey +JOIN tpch.part AS part + ON part.p_container LIKE 'SM%' + AND part.p_partkey = partsupp.ps_partkey + AND part.p_retailprice < ( + partsupp.ps_supplycost * 1.5 + ) +WHERE + supplier.s_nationkey <= 3 From 1d24b158eba4901911c8223d69ea68b96b767d4c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 22 Oct 2025 23:32:51 -0400 Subject: [PATCH 125/143] [RUN CI] --- pydough/conversion/relational_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 0d7d2e06d..d886bb3e9 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1586,7 +1586,7 @@ def optimize_relational_tree( # Re-run column bubbling now that the columns have been pruned again. root = bubble_column_names(root) - # Run the following pipeline three times: + # Run the following pipeline twice: # A: projection pullup # B: expression simplification # C: filter pushdown From 0f9f0988ddffc8cacb682d9397d9bf8c68d43f3a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 30 Oct 2025 00:55:19 -0400 Subject: [PATCH 126/143] Initial commit: tested prototype implementation to optimize queries via middle table removal --- pydough/conversion/join_key_substitution.py | 96 +++++++++++++++++++ pydough/conversion/relational_converter.py | 3 + .../epoch_unique_users_per_engine.txt | 12 +-- tests/test_plan_refsols/tpch_q21.txt | 10 +- .../defog_academic_gen1_ansi.sql | 4 +- .../defog_academic_gen1_mysql.sql | 4 +- .../defog_academic_gen1_postgres.sql | 4 +- .../defog_academic_gen1_snowflake.sql | 4 +- .../defog_academic_gen1_sqlite.sql | 4 +- .../defog_academic_gen21_ansi.sql | 4 +- .../defog_academic_gen21_mysql.sql | 4 +- .../defog_academic_gen21_postgres.sql | 4 +- .../defog_academic_gen21_snowflake.sql | 4 +- .../defog_academic_gen21_sqlite.sql | 4 +- .../defog_academic_gen22_ansi.sql | 8 +- .../defog_academic_gen22_mysql.sql | 8 +- .../defog_academic_gen22_postgres.sql | 8 +- .../defog_academic_gen22_snowflake.sql | 8 +- .../defog_academic_gen22_sqlite.sql | 8 +- .../defog_academic_gen24_ansi.sql | 10 +- .../defog_academic_gen24_mysql.sql | 4 +- .../defog_academic_gen24_postgres.sql | 4 +- .../defog_academic_gen24_snowflake.sql | 4 +- .../defog_academic_gen24_sqlite.sql | 4 +- .../defog_academic_gen25_ansi.sql | 4 +- .../defog_academic_gen25_mysql.sql | 4 +- .../defog_academic_gen25_postgres.sql | 4 +- .../defog_academic_gen25_snowflake.sql | 4 +- .../defog_academic_gen25_sqlite.sql | 4 +- .../epoch_unique_users_per_engine_ansi.sql | 26 +++-- .../epoch_unique_users_per_engine_mysql.sql | 26 +++-- ...epoch_unique_users_per_engine_postgres.sql | 26 +++-- ...poch_unique_users_per_engine_snowflake.sql | 26 +++-- .../epoch_unique_users_per_engine_sqlite.sql | 26 +++-- tests/test_sql_refsols/tpch_q21_ansi.sql | 30 +++--- tests/test_sql_refsols/tpch_q21_mysql.sql | 20 ++-- tests/test_sql_refsols/tpch_q21_postgres.sql | 22 ++--- tests/test_sql_refsols/tpch_q21_snowflake.sql | 22 ++--- tests/test_sql_refsols/tpch_q21_sqlite.sql | 22 ++--- 39 files changed, 252 insertions(+), 241 deletions(-) create mode 100644 pydough/conversion/join_key_substitution.py diff --git a/pydough/conversion/join_key_substitution.py b/pydough/conversion/join_key_substitution.py new file mode 100644 index 000000000..8021cf8c7 --- /dev/null +++ b/pydough/conversion/join_key_substitution.py @@ -0,0 +1,96 @@ +""" +TODO +""" + +from pydough.relational import ( + ColumnReference, + ColumnReferenceFinder, + Join, + JoinCardinality, + JoinType, + RelationalExpression, + RelationalNode, + RelationalShuttle, +) +from pydough.relational.rel_util import ( + apply_substitution, + extract_equijoin_keys, +) + + +class JoinKeySubstitutionShuttle(RelationalShuttle): + def visit_join(self, join: Join) -> RelationalNode: + join_substitution: dict[RelationalExpression, RelationalExpression] = {} + if join.join_type == JoinType.INNER: + lhs_keys_list, rhs_keys_list = extract_equijoin_keys(join) + if len(lhs_keys_list) > 0 and len(rhs_keys_list) > 0: + lhs_keys: set[ColumnReference] = set(lhs_keys_list) + rhs_keys: set[ColumnReference] = set(rhs_keys_list) + col_finder = ColumnReferenceFinder() + for value in join.columns.values(): + value.accept(col_finder) + col_refs: set[ColumnReference] = col_finder.get_column_references() + lhs_refs = { + ref + for ref in col_refs + if ref.input_name == join.default_input_aliases[0] + } + rhs_refs = col_refs - lhs_refs + if ( + join.cardinality == JoinCardinality.SINGULAR_ACCESS + and rhs_keys == rhs_refs + ): + for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): + join_substitution[rhs_key] = lhs_key + elif ( + join.reverse_cardinality == JoinCardinality.SINGULAR_ACCESS + and lhs_keys == rhs_refs + ): + for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): + join_substitution[lhs_key] = rhs_key + + if len(join_substitution) > 0: + join = Join( + join.inputs, + join.condition, + join.join_type, + { + name: apply_substitution(expr, join_substitution, {}) + for name, expr in join.columns.items() + }, + join.cardinality, + join.reverse_cardinality, + join.correl_name, + ) + + # # Find all column references in the join condition + # col_finder = ColumnReferenceFinder() + # col_finder.visit(join.condition) + # col_refs = col_finder.get_column_references() + + # substitution = {} + # for col_ref in col_refs: + # if add_input_name(col_ref, join.left.schema) in join.left.schema: + # substitution[col_ref] = add_input_name(col_ref, join.left.schema) + # elif add_input_name(col_ref, join.right.schema) in join.right.schema: + # substitution[col_ref] = add_input_name(col_ref, join.right.schema) + + # new_condition = apply_substitution(join.condition, substitution) + + # new_join: Join = Join( + # left=left, + # right=right, + # condition=new_condition, + # join_type=join.join_type, + # schema=join.schema + # ) + + return super().visit_join(join) + + +def join_key_substitution(root: RelationalNode) -> RelationalNode: + """ + TODO + """ + shuttle: JoinKeySubstitutionShuttle = JoinKeySubstitutionShuttle() + return root.accept_shuttle(shuttle) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index d886bb3e9..60eb97fb2 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -86,6 +86,7 @@ from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree from .join_aggregate_transpose import pull_aggregates_above_joins +from .join_key_substitution import join_key_substitution from .masking_shuttles import MaskLiteralComparisonShuttle from .merge_projects import merge_projects from .projection_pullup import pullup_projections @@ -1608,6 +1609,8 @@ def optimize_relational_tree( root = remove_redundant_aggs(root) root = pruner.prune_unused_columns(root) + root = confirm_root(join_key_substitution(root)) + # Re-run projection merging, without pushing into joins. This will allow # some redundant projections created by pullup to be removed entirely. root = confirm_root(merge_projects(root, push_into_joins=False)) diff --git a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt index 09e213e3f..d05efdf4f 100644 --- a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt +++ b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt @@ -1,9 +1,7 @@ -ROOT(columns=[('engine', search_engine), ('n_users', DEFAULT_TO(ndistinct_user_id, 0:numeric))], orderings=[(search_engine):asc_first]) - JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) +ROOT(columns=[('engine', search_engine), ('n_users', DEFAULT_TO(ndistinct_search_user_id, 0:numeric))], orderings=[(search_engine):asc_first]) + JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ndistinct_search_user_id': t1.ndistinct_search_user_id, 'search_engine': t0.search_engine}) AGGREGATE(keys={'search_engine': search_engine}, aggregations={}) SCAN(table=SEARCHES, columns={'search_engine': search_engine}) - AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_user_id': NDISTINCT(user_id)}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) - FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) - SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id}) + AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_search_user_id': NDISTINCT(search_user_id)}) + FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) + SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 0d8a6953e..e1b29a494 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -5,7 +5,7 @@ ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_ SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.l_orderkey, type=ANTI, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) @@ -14,10 +14,8 @@ ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_ SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) - FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.l_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_sql_refsols/defog_academic_gen1_ansi.sql b/tests/test_sql_refsols/defog_academic_gen1_ansi.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_ansi.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen1_mysql.sql b/tests/test_sql_refsols/defog_academic_gen1_mysql.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_mysql.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen1_postgres.sql b/tests/test_sql_refsols/defog_academic_gen1_postgres.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_postgres.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen1_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen1_snowflake.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_snowflake.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen1_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen1_sqlite.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_sqlite.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen21_ansi.sql b/tests/test_sql_refsols/defog_academic_gen21_ansi.sql index 45760b3bf..e28abbbc1 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_ansi.sql @@ -6,9 +6,7 @@ JOIN main.author AS author ON author.oid = organization.oid JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' diff --git a/tests/test_sql_refsols/defog_academic_gen21_mysql.sql b/tests/test_sql_refsols/defog_academic_gen21_mysql.sql index 4a0901199..e0c5c9c96 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_mysql.sql @@ -4,10 +4,8 @@ WITH _u_0 AS ( FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' GROUP BY diff --git a/tests/test_sql_refsols/defog_academic_gen21_postgres.sql b/tests/test_sql_refsols/defog_academic_gen21_postgres.sql index 4a0901199..e0c5c9c96 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_postgres.sql @@ -4,10 +4,8 @@ WITH _u_0 AS ( FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' GROUP BY diff --git a/tests/test_sql_refsols/defog_academic_gen21_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen21_snowflake.sql index 4a0901199..e0c5c9c96 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_snowflake.sql @@ -4,10 +4,8 @@ WITH _u_0 AS ( FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' GROUP BY diff --git a/tests/test_sql_refsols/defog_academic_gen21_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen21_sqlite.sql index 4a0901199..e0c5c9c96 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_sqlite.sql @@ -4,10 +4,8 @@ WITH _u_0 AS ( FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' GROUP BY diff --git a/tests/test_sql_refsols/defog_academic_gen22_ansi.sql b/tests/test_sql_refsols/defog_academic_gen22_ansi.sql index 95538288a..ca70ec1eb 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_ansi.sql @@ -10,9 +10,7 @@ SELECT FROM main.author AS author JOIN _s0 AS _s0 ON _s0.aid = author.aid -JOIN main.domain AS domain - ON _s0.did = domain.did -JOIN _s0 AS _s3 - ON _s3.did = domain.did +JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author_2 - ON LOWER(author_2.name) LIKE '%martin%' AND _s3.aid = author_2.aid + ON LOWER(author_2.name) LIKE '%martin%' AND _s1.aid = author_2.aid diff --git a/tests/test_sql_refsols/defog_academic_gen22_mysql.sql b/tests/test_sql_refsols/defog_academic_gen22_mysql.sql index c60c3d92b..53ec2291e 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_mysql.sql @@ -7,12 +7,10 @@ WITH _s0 AS ( SELECT _s0.aid AS _u_1 FROM _s0 AS _s0 - JOIN main.domain AS domain - ON _s0.did = domain.did - JOIN _s0 AS _s3 - ON _s3.did = domain.did + JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author - ON LOWER(author.name) LIKE '%martin%' AND _s3.aid = author.aid + ON LOWER(author.name) LIKE '%martin%' AND _s1.aid = author.aid GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_academic_gen22_postgres.sql b/tests/test_sql_refsols/defog_academic_gen22_postgres.sql index c60c3d92b..53ec2291e 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_postgres.sql @@ -7,12 +7,10 @@ WITH _s0 AS ( SELECT _s0.aid AS _u_1 FROM _s0 AS _s0 - JOIN main.domain AS domain - ON _s0.did = domain.did - JOIN _s0 AS _s3 - ON _s3.did = domain.did + JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author - ON LOWER(author.name) LIKE '%martin%' AND _s3.aid = author.aid + ON LOWER(author.name) LIKE '%martin%' AND _s1.aid = author.aid GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_academic_gen22_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen22_snowflake.sql index 8cefdff63..071aad418 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_snowflake.sql @@ -7,12 +7,10 @@ WITH _s0 AS ( SELECT _s0.aid AS _u_1 FROM _s0 AS _s0 - JOIN main.domain AS domain - ON _s0.did = domain.did - JOIN _s0 AS _s3 - ON _s3.did = domain.did + JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author - ON CONTAINS(LOWER(author.name), 'martin') AND _s3.aid = author.aid + ON CONTAINS(LOWER(author.name), 'martin') AND _s1.aid = author.aid GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_academic_gen22_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen22_sqlite.sql index c60c3d92b..53ec2291e 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_sqlite.sql @@ -7,12 +7,10 @@ WITH _s0 AS ( SELECT _s0.aid AS _u_1 FROM _s0 AS _s0 - JOIN main.domain AS domain - ON _s0.did = domain.did - JOIN _s0 AS _s3 - ON _s3.did = domain.did + JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author - ON LOWER(author.name) LIKE '%martin%' AND _s3.aid = author.aid + ON LOWER(author.name) LIKE '%martin%' AND _s1.aid = author.aid GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_academic_gen24_ansi.sql b/tests/test_sql_refsols/defog_academic_gen24_ansi.sql index bf25d3345..709d36693 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_ansi.sql @@ -1,12 +1,10 @@ -WITH _s9 AS ( +WITH _s7 AS ( SELECT domain_conference.cid, writes.pid FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference @@ -15,7 +13,7 @@ WITH _s9 AS ( SELECT publication.title FROM main.publication AS publication -JOIN _s9 AS _s9 - ON _s9.cid = publication.cid AND _s9.pid = publication.pid +JOIN _s7 AS _s7 + ON _s7.cid = publication.cid AND _s7.pid = publication.pid WHERE publication.year = 2020 diff --git a/tests/test_sql_refsols/defog_academic_gen24_mysql.sql b/tests/test_sql_refsols/defog_academic_gen24_mysql.sql index ebc20c922..8b9e27d18 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_mysql.sql @@ -3,10 +3,8 @@ WITH _u_0 AS ( domain_conference.cid AS _u_1, writes.pid AS _u_2 FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference diff --git a/tests/test_sql_refsols/defog_academic_gen24_postgres.sql b/tests/test_sql_refsols/defog_academic_gen24_postgres.sql index ebc20c922..8b9e27d18 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_postgres.sql @@ -3,10 +3,8 @@ WITH _u_0 AS ( domain_conference.cid AS _u_1, writes.pid AS _u_2 FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference diff --git a/tests/test_sql_refsols/defog_academic_gen24_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen24_snowflake.sql index e9c898c8f..1a191d3e8 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_snowflake.sql @@ -3,10 +3,8 @@ WITH _u_0 AS ( domain_conference.cid AS _u_1, writes.pid AS _u_2 FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON CONTAINS(LOWER(domain.name), 'sociology') AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference diff --git a/tests/test_sql_refsols/defog_academic_gen24_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen24_sqlite.sql index ebc20c922..8b9e27d18 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_sqlite.sql @@ -3,10 +3,8 @@ WITH _u_0 AS ( domain_conference.cid AS _u_1, writes.pid AS _u_2 FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference diff --git a/tests/test_sql_refsols/defog_academic_gen25_ansi.sql b/tests/test_sql_refsols/defog_academic_gen25_ansi.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_ansi.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen25_mysql.sql b/tests/test_sql_refsols/defog_academic_gen25_mysql.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_mysql.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen25_postgres.sql b/tests/test_sql_refsols/defog_academic_gen25_postgres.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_postgres.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen25_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen25_snowflake.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_snowflake.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen25_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen25_sqlite.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_sqlite.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_ansi.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_ansi.sql index 1149447fd..91fa105d2 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_ansi.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_ansi.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM searches -), _s3 AS ( +), _s1 AS ( SELECT - searches.search_engine, - COUNT(DISTINCT users.user_id) AS ndistinct_user_id - FROM searches AS searches - JOIN users AS users - ON searches.search_user_id = users.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches WHERE - EXTRACT(YEAR FROM CAST(searches.search_ts AS DATETIME)) <= 2019 - AND EXTRACT(YEAR FROM CAST(searches.search_ts AS DATETIME)) >= 2010 + EXTRACT(YEAR FROM CAST(search_ts AS DATETIME)) <= 2019 + AND EXTRACT(YEAR FROM CAST(search_ts AS DATETIME)) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_mysql.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_mysql.sql index ba9b2af64..20ffd6916 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_mysql.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_mysql.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM SEARCHES -), _s3 AS ( +), _s1 AS ( SELECT - SEARCHES.search_engine, - COUNT(DISTINCT USERS.user_id) AS ndistinct_user_id - FROM SEARCHES AS SEARCHES - JOIN USERS AS USERS - ON SEARCHES.search_user_id = USERS.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM SEARCHES WHERE - EXTRACT(YEAR FROM CAST(SEARCHES.search_ts AS DATETIME)) <= 2019 - AND EXTRACT(YEAR FROM CAST(SEARCHES.search_ts AS DATETIME)) >= 2010 + EXTRACT(YEAR FROM CAST(search_ts AS DATETIME)) <= 2019 + AND EXTRACT(YEAR FROM CAST(search_ts AS DATETIME)) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine COLLATE utf8mb4_bin AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine COLLATE utf8mb4_bin AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_postgres.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_postgres.sql index 7d12c81e0..2cd5d513e 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_postgres.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_postgres.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM searches -), _s3 AS ( +), _s1 AS ( SELECT - searches.search_engine, - COUNT(DISTINCT users.user_id) AS ndistinct_user_id - FROM searches AS searches - JOIN users AS users - ON searches.search_user_id = users.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches WHERE - EXTRACT(YEAR FROM CAST(searches.search_ts AS TIMESTAMP)) <= 2019 - AND EXTRACT(YEAR FROM CAST(searches.search_ts AS TIMESTAMP)) >= 2010 + EXTRACT(YEAR FROM CAST(search_ts AS TIMESTAMP)) <= 2019 + AND EXTRACT(YEAR FROM CAST(search_ts AS TIMESTAMP)) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql index 971e806fc..b9bcb8e5b 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM searches -), _s3 AS ( +), _s1 AS ( SELECT - searches.search_engine, - COUNT(DISTINCT users.user_id) AS ndistinct_user_id - FROM searches AS searches - JOIN users AS users - ON searches.search_user_id = users.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches WHERE - YEAR(CAST(searches.search_ts AS TIMESTAMP)) <= 2019 - AND YEAR(CAST(searches.search_ts AS TIMESTAMP)) >= 2010 + YEAR(CAST(search_ts AS TIMESTAMP)) <= 2019 + AND YEAR(CAST(search_ts AS TIMESTAMP)) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_sqlite.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_sqlite.sql index 0c6965b9f..44bc84f50 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_sqlite.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_sqlite.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM searches -), _s3 AS ( +), _s1 AS ( SELECT - searches.search_engine, - COUNT(DISTINCT users.user_id) AS ndistinct_user_id - FROM searches AS searches - JOIN users AS users - ON searches.search_user_id = users.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches WHERE - CAST(STRFTIME('%Y', searches.search_ts) AS INTEGER) <= 2019 - AND CAST(STRFTIME('%Y', searches.search_ts) AS INTEGER) >= 2010 + CAST(STRFTIME('%Y', search_ts) AS INTEGER) <= 2019 + AND CAST(STRFTIME('%Y', search_ts) AS INTEGER) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index a53a1e9b5..c95b9b937 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -25,36 +25,28 @@ WITH _t5 AS ( 2, 3 ), _s11 AS ( - SELECT - _t6.l_linenumber, - _t6.l_orderkey, - orders.o_orderkey - FROM _t5 AS _t6 - JOIN tpch.orders AS orders - ON _t6.l_orderkey = orders.o_orderkey - JOIN tpch.lineitem AS lineitem - ON _t6.l_suppkey <> lineitem.l_suppkey - AND lineitem.l_commitdate < lineitem.l_receiptdate - AND lineitem.l_orderkey = orders.o_orderkey -), _s13 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 - JOIN _s11 AS _s11 - ON _s11.l_linenumber = _t3.l_linenumber - AND _s11.l_orderkey = _t3.l_orderkey - AND _s11.o_orderkey = _t3.o_orderkey + JOIN _t5 AS _t6 + ON _t3.l_linenumber = _t6.l_linenumber + AND _t3.l_orderkey = _t6.l_orderkey + AND _t3.o_orderkey = _t6.l_orderkey + JOIN tpch.lineitem AS lineitem + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey + AND lineitem.l_commitdate < lineitem.l_receiptdate WHERE _t3.anything_o_orderstatus = 'F' ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index 302c196ca..7a70299a0 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -27,38 +27,34 @@ WITH _t5 AS ( ), _u_0 AS ( SELECT _t6.l_linenumber AS _u_1, - _t6.l_orderkey AS _u_2, - ORDERS.o_orderkey AS _u_3 + _t6.l_orderkey AS _u_2 FROM _t5 AS _t6 - JOIN tpch.ORDERS AS ORDERS - ON ORDERS.o_orderkey = _t6.l_orderkey JOIN tpch.LINEITEM AS LINEITEM ON LINEITEM.l_commitdate < LINEITEM.l_receiptdate - AND LINEITEM.l_orderkey = ORDERS.o_orderkey + AND LINEITEM.l_orderkey = _t6.l_orderkey AND LINEITEM.l_suppkey <> _t6.l_suppkey GROUP BY 1, - 2, - 3 -), _s13 AS ( + 2 +), _s11 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 AND _t3.l_orderkey = _u_0._u_2 - AND _t3.o_orderkey = _u_0._u_3 + AND _t3.o_orderkey = _u_0._u_2 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL ) SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.SUPPLIER AS SUPPLIER JOIN tpch.NATION AS NATION ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey -LEFT JOIN _s13 AS _s13 - ON SUPPLIER.s_suppkey = _s13.anything_l_suppkey +LEFT JOIN _s11 AS _s11 + ON SUPPLIER.s_suppkey = _s11.anything_l_suppkey GROUP BY SUPPLIER.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index 79c4527cd..4231c83c5 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -27,38 +27,34 @@ WITH _t5 AS ( ), _u_0 AS ( SELECT _t6.l_linenumber AS _u_1, - _t6.l_orderkey AS _u_2, - orders.o_orderkey AS _u_3 + _t6.l_orderkey AS _u_2 FROM _t5 AS _t6 - JOIN tpch.orders AS orders - ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t6.l_suppkey <> lineitem.l_suppkey + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate - AND lineitem.l_orderkey = orders.o_orderkey GROUP BY 1, - 2, - 3 -), _s13 AS ( + 2 +), _s11 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 AND _t3.l_orderkey = _u_0._u_2 - AND _t3.o_orderkey = _u_0._u_3 + AND _t3.o_orderkey = _u_0._u_2 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL ) SELECT MAX(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index aa6a0445b..9e97a650e 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -27,38 +27,34 @@ WITH _t5 AS ( ), _u_0 AS ( SELECT _t6.l_linenumber AS _u_1, - _t6.l_orderkey AS _u_2, - orders.o_orderkey AS _u_3 + _t6.l_orderkey AS _u_2 FROM _t5 AS _t6 - JOIN tpch.orders AS orders - ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t6.l_suppkey <> lineitem.l_suppkey + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate - AND lineitem.l_orderkey = orders.o_orderkey GROUP BY 1, - 2, - 3 -), _s13 AS ( + 2 +), _s11 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 AND _t3.l_orderkey = _u_0._u_2 - AND _t3.o_orderkey = _u_0._u_3 + AND _t3.o_orderkey = _u_0._u_2 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index c5ceb7d67..6d8bb96d5 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -27,38 +27,34 @@ WITH _t5 AS ( ), _u_0 AS ( SELECT _t6.l_linenumber AS _u_1, - _t6.l_orderkey AS _u_2, - orders.o_orderkey AS _u_3 + _t6.l_orderkey AS _u_2 FROM _t5 AS _t6 - JOIN tpch.orders AS orders - ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t6.l_suppkey <> lineitem.l_suppkey + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate - AND lineitem.l_orderkey = orders.o_orderkey GROUP BY 1, - 2, - 3 -), _s13 AS ( + 2 +), _s11 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 AND _t3.l_orderkey = _u_0._u_2 - AND _t3.o_orderkey = _u_0._u_3 + AND _t3.o_orderkey = _u_0._u_2 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL ) SELECT MAX(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey GROUP BY supplier.s_suppkey ORDER BY From fd026b2822c9b4ff03046268fcd768f22a01ca7c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 30 Oct 2025 13:19:07 -0400 Subject: [PATCH 127/143] Moving around usage location of join key substitution --- pydough/conversion/relational_converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 60eb97fb2..76e88e005 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1594,7 +1594,8 @@ def optimize_relational_tree( # D: join-aggregate transpose # E: projection pullup again # F: redundant aggregation removal - # G: column pruning + # G: join key substitution + # H: column pruning # This is done because pullup will create more opportunities for expression # simplification, which will allow more filters to be pushed further down, # and the combination of those together will create more opportunities for @@ -1607,10 +1608,9 @@ def optimize_relational_tree( root = confirm_root(pull_aggregates_above_joins(root)) root = confirm_root(pullup_projections(root)) root = remove_redundant_aggs(root) + root = confirm_root(join_key_substitution(root)) root = pruner.prune_unused_columns(root) - root = confirm_root(join_key_substitution(root)) - # Re-run projection merging, without pushing into joins. This will allow # some redundant projections created by pullup to be removed entirely. root = confirm_root(merge_projects(root, push_into_joins=False)) From d91d799552ca8e033fe2be3f2ecb888d6a54283e Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 31 Oct 2025 05:57:22 -0400 Subject: [PATCH 128/143] Adding subset enhancement, and more tests of edge cases --- pydough/conversion/join_key_substitution.py | 45 +++++++------- tests/test_metadata/sample_graphs.json | 2 +- .../snowflake_sample_graphs.json | 2 +- tests/test_pipeline_tpch_custom.py | 59 +++++++++++++++++++ .../count_cust_supplier_nation_combos.txt | 29 ++++----- .../richest_customer_key_per_region.txt | 5 ++ .../test_plan_refsols/top_lineitems_info.txt | 11 ++++ .../top_lineitems_info_1.txt | 11 ++++ .../top_lineitems_info_2.txt | 12 ++++ .../defog_dealership_adv6_ansi.sql | 4 +- .../defog_dealership_adv6_mysql.sql | 4 +- .../defog_dealership_adv6_postgres.sql | 4 +- .../defog_dealership_adv6_snowflake.sql | 4 +- .../defog_dealership_adv6_sqlite.sql | 4 +- 14 files changed, 144 insertions(+), 52 deletions(-) create mode 100644 tests/test_plan_refsols/richest_customer_key_per_region.txt create mode 100644 tests/test_plan_refsols/top_lineitems_info.txt create mode 100644 tests/test_plan_refsols/top_lineitems_info_1.txt create mode 100644 tests/test_plan_refsols/top_lineitems_info_2.txt diff --git a/pydough/conversion/join_key_substitution.py b/pydough/conversion/join_key_substitution.py index 8021cf8c7..55f839019 100644 --- a/pydough/conversion/join_key_substitution.py +++ b/pydough/conversion/join_key_substitution.py @@ -19,11 +19,20 @@ class JoinKeySubstitutionShuttle(RelationalShuttle): + """ + TODO + """ + def visit_join(self, join: Join) -> RelationalNode: + # Build up a mapping of join key substitutions mapping input columns + # from one input to another when the optimization case is detected: + # requires an inner join with equi-join keys. join_substitution: dict[RelationalExpression, RelationalExpression] = {} if join.join_type == JoinType.INNER: lhs_keys_list, rhs_keys_list = extract_equijoin_keys(join) if len(lhs_keys_list) > 0 and len(rhs_keys_list) > 0: + # Identify which columns are used by the join columns that come + # from the left and right inputs. lhs_keys: set[ColumnReference] = set(lhs_keys_list) rhs_keys: set[ColumnReference] = set(rhs_keys_list) col_finder = ColumnReferenceFinder() @@ -36,19 +45,28 @@ def visit_join(self, join: Join) -> RelationalNode: if ref.input_name == join.default_input_aliases[0] } rhs_refs = col_refs - lhs_refs + # If the left side is singular access, and all the columns used + # from the right side are just the join keys, then we can + # substitute the right join keys with the left join keys. if ( join.cardinality == JoinCardinality.SINGULAR_ACCESS - and rhs_keys == rhs_refs + and rhs_refs <= rhs_keys ): for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): join_substitution[rhs_key] = lhs_key + + # If the right side is singular access, and all the columns used + # from the left side are just the join keys, then we can + # substitute the left join keys with the right join keys. elif ( join.reverse_cardinality == JoinCardinality.SINGULAR_ACCESS - and lhs_keys == rhs_refs + and rhs_refs <= lhs_keys ): for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): join_substitution[lhs_key] = rhs_key + # If any substitutions were identified, create a new Join node + # with the substitutions applied to its columns. if len(join_substitution) > 0: join = Join( join.inputs, @@ -63,28 +81,7 @@ def visit_join(self, join: Join) -> RelationalNode: join.correl_name, ) - # # Find all column references in the join condition - # col_finder = ColumnReferenceFinder() - # col_finder.visit(join.condition) - # col_refs = col_finder.get_column_references() - - # substitution = {} - # for col_ref in col_refs: - # if add_input_name(col_ref, join.left.schema) in join.left.schema: - # substitution[col_ref] = add_input_name(col_ref, join.left.schema) - # elif add_input_name(col_ref, join.right.schema) in join.right.schema: - # substitution[col_ref] = add_input_name(col_ref, join.right.schema) - - # new_condition = apply_substitution(join.condition, substitution) - - # new_join: Join = Join( - # left=left, - # right=right, - # condition=new_condition, - # join_type=join.join_type, - # schema=join.schema - # ) - + # Recursively visit the inputs to the join to transform them as well. return super().visit_join(join) diff --git a/tests/test_metadata/sample_graphs.json b/tests/test_metadata/sample_graphs.json index 202e38d52..3b0757973 100644 --- a/tests/test_metadata/sample_graphs.json +++ b/tests/test_metadata/sample_graphs.json @@ -673,7 +673,7 @@ "parent collection": "parts", "child collection": "supply_records", "singular": false, - "always matches": false, + "always matches": true, "keys": {"key": ["part_key"]}, "description": "The records indicating which companies supply the part", "synonyms": ["producers", "vendors", "suppliers of part"] diff --git a/tests/test_metadata/snowflake_sample_graphs.json b/tests/test_metadata/snowflake_sample_graphs.json index d3b7c665e..dc510d1e7 100644 --- a/tests/test_metadata/snowflake_sample_graphs.json +++ b/tests/test_metadata/snowflake_sample_graphs.json @@ -673,7 +673,7 @@ "parent collection": "parts", "child collection": "supply_records", "singular": false, - "always matches": false, + "always matches": true, "keys": {"key": ["part_key"]}, "description": "The records indicating which companies supply the part", "synonyms": ["producers", "vendors", "suppliers of part"] diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 43dea2ff0..fd035c64b 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -1449,6 +1449,65 @@ ), id="supplier_pct_national_qty", ), + pytest.param( + PyDoughPandasTest( + "result = (" + " regions" + " .nations" + " .customers" + " .BEST(by=account_balance.DESC(), per='regions')" + " .CALCULATE(key)" + ")", + "TPCH", + lambda: pd.DataFrame({"key": [2487, 61453, 76011, 81976, 144232]}), + "richest_customer_key_per_region", + ), + id="richest_customer_key_per_region", + ), + pytest.param( + PyDoughPandasTest( + "result = (" + " lines" + " .TOP_K(7, by=(order_key.ASC(), line_number.ASC()))" + " .CALCULATE(order_key, line_number, part_size=part_and_supplier.part.size, supplier_nation=part_and_supplier.supplier.nation.key)" + ")", + "TPCH", + lambda: pd.DataFrame( + { + "order_key": [1, 1, 1, 1, 1, 1, 2], + "line_number": [1, 2, 3, 4, 5, 6, 1], + "part_size": [9, 47, 16, 20, 44, 46, 19], + "supplier_nation": [23, 13, 5, 24, 20, 8, 0], + } + ), + "top_lineitems_info_1", + ), + id="top_lineitems_info_1", + ), + pytest.param( + PyDoughPandasTest( + "result = (" + " parts" + " .CALCULATE(part_size=size, selected_part_key=key)" + " .supply_records.CALCULATE(selected_supplier_key=supplier_key)" + " .CROSS(nations.CALCULATE(supplier_nation=key).suppliers.supply_records.lines)" + " .WHERE((part_key == selected_part_key) & (supplier_key == selected_supplier_key))" + " .TOP_K(7, by=(order_key.ASC(), line_number.ASC()))" + " .CALCULATE(order_key, line_number, part_size, supplier_nation)" + ")", + "TPCH", + lambda: pd.DataFrame( + { + "order_key": [1, 1, 1, 1, 1, 1, 2], + "line_number": [1, 2, 3, 4, 5, 6, 1], + "part_size": [9, 47, 16, 20, 44, 46, 19], + "supplier_nation": [23, 13, 5, 24, 20, 8, 0], + } + ), + "top_lineitems_info_2", + ), + id="top_lineitems_info_2", + ), pytest.param( PyDoughPandasTest( window_filter_order_1, diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index bbdb92976..0813f6ed2 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -1,18 +1,15 @@ -ROOT(columns=[('year', year_o_orderdate), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_sum_n_rows), ('total_value', DEFAULT_TO(sum_sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_sum_sum_n_rows': SUM(sum_sum_n_rows), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year_o_orderdate': t0.year_o_orderdate}) - AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year_o_orderdate': t0.year_o_orderdate}) - AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_rows': t0.n_rows, 'ps_suppkey': t1.ps_suppkey, 'sum_l_extendedprice': t0.sum_l_extendedprice, 'year_o_orderdate': t0.year_o_orderdate}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT(), 'sum_l_extendedprice': SUM(l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) +ROOT(columns=[('year', year_o_orderdate), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_n_rows), ('total_value', DEFAULT_TO(sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) + AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year_o_orderdate': t0.year_o_orderdate}) + AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_rows': t0.n_rows, 's_nationkey': t1.s_nationkey, 'sum_l_extendedprice': t0.sum_l_extendedprice, 'year_o_orderdate': t0.year_o_orderdate}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT(), 'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/richest_customer_key_per_region.txt b/tests/test_plan_refsols/richest_customer_key_per_region.txt new file mode 100644 index 000000000..67636a941 --- /dev/null +++ b/tests/test_plan_refsols/richest_customer_key_per_region.txt @@ -0,0 +1,5 @@ +ROOT(columns=[('key', c_custkey)], orderings=[]) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/top_lineitems_info.txt b/tests/test_plan_refsols/top_lineitems_info.txt new file mode 100644 index 000000000..4f4492cab --- /dev/null +++ b/tests/test_plan_refsols/top_lineitems_info.txt @@ -0,0 +1,11 @@ +ROOT(columns=[('order_key', l_orderkey), ('line_number', l_linenumber), ('part_size', p_size), ('supplier_nation', s_nationkey)], orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first]) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_size': t0.p_size, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_size': t1.p_size}) + LIMIT(limit=5:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first]) + SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_size': t1.p_size, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_lineitems_info_1.txt b/tests/test_plan_refsols/top_lineitems_info_1.txt new file mode 100644 index 000000000..8ea7fb213 --- /dev/null +++ b/tests/test_plan_refsols/top_lineitems_info_1.txt @@ -0,0 +1,11 @@ +ROOT(columns=[('order_key', l_orderkey), ('line_number', l_linenumber), ('part_size', p_size), ('supplier_nation', s_nationkey)], orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first]) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_size': t0.p_size, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_size': t1.p_size}) + LIMIT(limit=7:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first]) + SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_size': t1.p_size, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_lineitems_info_2.txt b/tests/test_plan_refsols/top_lineitems_info_2.txt new file mode 100644 index 000000000..aff71c57c --- /dev/null +++ b/tests/test_plan_refsols/top_lineitems_info_2.txt @@ -0,0 +1,12 @@ +ROOT(columns=[('order_key', l_orderkey), ('line_number', l_linenumber), ('part_size', p_size), ('supplier_nation', n_nationkey)], orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first], limit=7:numeric) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.supplier_key_11 == t1.l_suppkey & t1.l_partkey == t0.p_partkey & t1.l_suppkey == t0.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'n_nationkey': t0.n_nationkey, 'p_size': t0.p_size}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'supplier_key_11': t1.ps_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_suppkey': t0.ps_suppkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_suppkey': t1.ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql index 1126345c2..66bcfbae9 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql @@ -14,8 +14,8 @@ FROM main.cars AS cars JOIN _t2 AS _t2 ON _t2.car_id = cars._id LEFT JOIN main.sales AS sales - ON cars._id = sales.car_id + ON _t2.car_id = sales.car_id GROUP BY - cars._id + _t2.car_id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql index dbae4e04f..ee2e74f1e 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql @@ -13,8 +13,8 @@ FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN main.sales AS sales - ON cars._id = sales.car_id + ON _t.car_id = sales.car_id GROUP BY - cars._id + _t.car_id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql index afd34c12e..60e75c478 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql @@ -13,8 +13,8 @@ FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN main.sales AS sales - ON cars._id = sales.car_id + ON _t.car_id = sales.car_id GROUP BY - cars._id + _t.car_id ORDER BY 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql index 10d15777f..1174a6185 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql @@ -14,8 +14,8 @@ FROM main.cars AS cars JOIN _t2 AS _t2 ON _t2.car_id = cars._id LEFT JOIN main.sales AS sales - ON cars._id = sales.car_id + ON _t2.car_id = sales.car_id GROUP BY - cars._id + _t2.car_id ORDER BY 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql index 0d704d83a..c7aa0e56a 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql @@ -13,8 +13,8 @@ FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN main.sales AS sales - ON cars._id = sales.car_id + ON _t.car_id = sales.car_id GROUP BY - cars._id + _t.car_id ORDER BY 3 DESC From 6b4313f5318b10a61debea54579ee2957c281ae7 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 31 Oct 2025 06:27:14 -0400 Subject: [PATCH 129/143] Updating plan files --- tests/test_plan_refsols/aggregate_anti.txt | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 8 ++++---- tests/test_plan_refsols/aggregation_analytics_2.txt | 2 +- tests/test_plan_refsols/aggregation_analytics_3.txt | 2 +- tests/test_plan_refsols/anti_aggregate.txt | 2 +- tests/test_plan_refsols/anti_aggregate_alternate.txt | 2 +- tests/test_plan_refsols/common_prefix_ad.txt | 2 +- tests/test_plan_refsols/common_prefix_ap.txt | 6 +++--- tests/test_plan_refsols/common_prefix_aq.txt | 2 +- tests/test_plan_refsols/common_prefix_l.txt | 2 +- tests/test_plan_refsols/common_prefix_m.txt | 2 +- tests/test_plan_refsols/correl_14.txt | 8 ++++---- tests/test_plan_refsols/correl_15.txt | 8 ++++---- .../lines_german_supplier_economy_part.txt | 2 +- tests/test_plan_refsols/multiple_has_hasnot.txt | 12 ++++++------ .../rank_parts_per_supplier_region_by_size.txt | 2 +- tests/test_plan_refsols/semi_aggregate.txt | 8 ++++---- tests/test_plan_refsols/simple_anti_2.txt | 2 +- tests/test_plan_refsols/simple_semi_2.txt | 2 +- tests/test_plan_refsols/singular7.txt | 2 +- tests/test_plan_refsols/tpch_q2.txt | 2 +- tests/test_sql_refsols/correl_14_sqlite.sql | 9 ++++++--- tests/test_sql_refsols/correl_15_sqlite.sql | 9 ++++++--- 23 files changed, 52 insertions(+), 46 deletions(-) diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index c61c67b94..8d4b6d5bc 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 18f353c33..6c99fbc08 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index b719a64e1..2ef53d9c2 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index f29c91858..339aaf5d5 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index c61c67b94..8d4b6d5bc 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index c12bdd20e..a273602c6 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', 0:numeric), ('sum_price_of_10parts', None:unknown)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 022d52ee5..2312171f5 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -7,7 +7,7 @@ ROOT(columns=[('supplier_name', anything_s_name), ('part_name', anything_p_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ap.txt b/tests/test_plan_refsols/common_prefix_ap.txt index 18f665258..43b28f45b 100644 --- a/tests/test_plan_refsols/common_prefix_ap.txt +++ b/tests/test_plan_refsols/common_prefix_ap.txt @@ -1,10 +1,10 @@ ROOT(columns=[('part_name', p_name), ('supplier_name', s_name), ('supplier_quantity', ps_availqty), ('supplier_nation', n_name)], orderings=[(p_name):asc_first]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=p_brand == 'Brand#32':string & p_size == 10:numeric & CONTAINS(p_name, 'pink':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey, 'p_size': p_size}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 32e6086dd..26ca70545 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -7,7 +7,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier' JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 01e756d5e..7694c7141 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -12,7 +12,7 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppl SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 7d1b191be..ba4252b2b 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -10,7 +10,7 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 9fe75904a..125f405ef 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,12 +1,12 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 2f8b7cb03..4b184d5bf 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,15 +1,15 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'supplier_avg_price': AVG(p_retailprice)}) + JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt index 1987fec13..1d9130f74 100644 --- a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt +++ b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt @@ -8,7 +8,7 @@ ROOT(columns=[('order_key', l_orderkey), ('ship_date', l_shipdate), ('extended_p SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=STARTSWITH(p_type, 'ECONOMY':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/multiple_has_hasnot.txt b/tests/test_plan_refsols/multiple_has_hasnot.txt index 78b9d31fd..cf25533fb 100644 --- a/tests/test_plan_refsols/multiple_has_hasnot.txt +++ b/tests/test_plan_refsols/multiple_has_hasnot.txt @@ -3,20 +3,20 @@ ROOT(columns=[('name', p_name)], orderings=[]) JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'ARGENTINA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index 0cbd4c887..0e4ebd6a9 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True))], orderings=[(p_partkey):asc_first], limit=15:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 18f353c33..6c99fbc08 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_anti_2.txt b/tests/test_plan_refsols/simple_anti_2.txt index b87256acc..081922387 100644 --- a/tests/test_plan_refsols/simple_anti_2.txt +++ b/tests/test_plan_refsols/simple_anti_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size < 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_semi_2.txt b/tests/test_plan_refsols/simple_semi_2.txt index d52362ab1..e3f041ae8 100644 --- a/tests/test_plan_refsols/simple_semi_2.txt +++ b/tests/test_plan_refsols/simple_semi_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size < 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 8251cf845..857976233 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -6,7 +6,7 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_ord FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'l_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index e097c9898..5f3a777cd 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first], limit=10:numeric) FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index 011898756..cc4c44913 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,7 +1,8 @@ WITH _s4 AS ( SELECT partsupp.ps_suppkey, - AVG(part.p_retailprice) AS avg_p_retailprice + SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr, + SUM(part.p_retailprice) AS sum_p_retailprice FROM tpch.supplier AS supplier JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey @@ -18,9 +19,11 @@ FROM _s4 AS _s4 JOIN tpch.partsupp AS partsupp ON _s4.ps_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part - ON _s4.avg_p_retailprice > part.p_retailprice - AND part.p_container = 'LG DRUM' + ON part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey + AND part.p_retailprice < ( + CAST(_s4.sum_p_retailprice AS REAL) / _s4.sum_expr + ) AND part.p_retailprice < ( partsupp.ps_supplycost * 1.5 ) diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index f65eaa980..e93da2ce7 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -6,7 +6,8 @@ WITH _s0 AS ( SELECT partsupp.ps_suppkey, MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, - AVG(part.p_retailprice) AS supplier_avg_price + SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr, + SUM(part.p_retailprice) AS sum_p_retailprice FROM _s0 AS _s0 JOIN tpch.supplier AS supplier ON supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 @@ -23,9 +24,11 @@ FROM _s6 AS _s6 JOIN tpch.partsupp AS partsupp ON _s6.ps_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part - ON _s6.supplier_avg_price > part.p_retailprice - AND part.p_container = 'LG DRUM' + ON part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey + AND part.p_retailprice < ( + CAST(_s6.sum_p_retailprice AS REAL) / _s6.sum_expr + ) AND part.p_retailprice < ( _s6.anything_avg_p_retailprice * 0.85 ) From d4394348cbf5d2f416d5f4a31e5e42aa2e617c07 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 31 Oct 2025 19:39:19 -0400 Subject: [PATCH 130/143] Moved around join key substitution invocation --- pydough/conversion/relational_converter.py | 6 ++++++ tests/test_plan_refsols/correl_19.txt | 5 ++--- .../wdi_low_income_country_with_series.txt | 4 +--- tests/test_sql_refsols/correl_19_sqlite.sql | 6 +++--- .../wdi_low_income_country_with_series_ansi.sql | 4 +--- .../wdi_low_income_country_with_series_mysql.sql | 9 ++++----- .../wdi_low_income_country_with_series_postgres.sql | 9 ++++----- .../wdi_low_income_country_with_series_snowflake.sql | 9 ++++----- .../wdi_low_income_country_with_series_sqlite.sql | 9 ++++----- 9 files changed, 29 insertions(+), 32 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 76e88e005..c0d12829e 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1547,6 +1547,12 @@ def optimize_relational_tree( pruner: ColumnPruner = ColumnPruner() root = pruner.prune_unused_columns(root) + # Run a pass that substitutes join keys when the only columns used by one + # side of the join are the join keys. This will make some joins redundant + # and allow them to be deleted later. Then, re-run column pruning. + root = confirm_root(join_key_substitution(root)) + root = pruner.prune_unused_columns(root) + # Bubble up names from the leaf nodes to further encourage simpler naming # without aliases, and also to delete duplicate columns where possible. # This is done early to maximize the chances that a nicer name will be used diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index c838204a3..88c6c172d 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,7 +1,6 @@ ROOT(columns=[('supplier_name', anything_s_name), ('n_super_cust', n_rows)], orderings=[(n_rows):desc_last], limit=5:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + FILTER(condition=s_nationkey == s_nationkey, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/wdi_low_income_country_with_series.txt b/tests/test_plan_refsols/wdi_low_income_country_with_series.txt index eb6f2d707..a49f09263 100644 --- a/tests/test_plan_refsols/wdi_low_income_country_with_series.txt +++ b/tests/test_plan_refsols/wdi_low_income_country_with_series.txt @@ -2,7 +2,5 @@ ROOT(columns=[('country_code', CountryCode)], orderings=[]) JOIN(condition=t0.CountryCode == t1.Countrycode, type=SEMI, columns={'CountryCode': t0.CountryCode}) FILTER(condition=IncomeGroup == 'Low income':string, columns={'CountryCode': CountryCode}) SCAN(table=wdi.Country, columns={'CountryCode': CountryCode, 'IncomeGroup': IncomeGroup}) - JOIN(condition=t0.Seriescode == t1.SeriesCode, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'Countrycode': t0.Countrycode}) + FILTER(condition=Seriescode == 'DT.DOD.DECT.CD':string, columns={'Countrycode': Countrycode}) SCAN(table=wdi.CountryNotes, columns={'Countrycode': Countrycode, 'Seriescode': Seriescode}) - FILTER(condition=SeriesCode == 'DT.DOD.DECT.CD':string, columns={'SeriesCode': SeriesCode}) - SCAN(table=wdi.Series, columns={'SeriesCode': SeriesCode}) diff --git a/tests/test_sql_refsols/correl_19_sqlite.sql b/tests/test_sql_refsols/correl_19_sqlite.sql index 8f1892961..59e5c7059 100644 --- a/tests/test_sql_refsols/correl_19_sqlite.sql +++ b/tests/test_sql_refsols/correl_19_sqlite.sql @@ -2,11 +2,11 @@ SELECT MAX(supplier.s_name) AS supplier_name, COUNT(*) AS n_super_cust FROM tpch.supplier AS supplier -JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey JOIN tpch.customer AS customer ON customer.c_acctbal > supplier.s_acctbal - AND customer.c_nationkey = nation.n_nationkey + AND customer.c_nationkey = supplier.s_nationkey +WHERE + supplier.s_nationkey = supplier.s_nationkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql index 0faaf80d5..01186260c 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql @@ -3,8 +3,6 @@ SELECT FROM wdi.country AS country JOIN wdi.countrynotes AS countrynotes ON country.countrycode = countrynotes.countrycode -JOIN wdi.series AS series - ON countrynotes.seriescode = series.seriescode - AND series.seriescode = 'DT.DOD.DECT.CD' + AND countrynotes.seriescode = 'DT.DOD.DECT.CD' WHERE country.incomegroup = 'Low income' diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql index 64ea58619..0d894947d 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql @@ -1,10 +1,9 @@ WITH _u_0 AS ( SELECT - CountryNotes.countrycode AS _u_1 - FROM wdi.CountryNotes AS CountryNotes - JOIN wdi.Series AS Series - ON CountryNotes.seriescode = Series.seriescode - AND Series.seriescode = 'DT.DOD.DECT.CD' + countrycode AS _u_1 + FROM wdi.CountryNotes + WHERE + seriescode = 'DT.DOD.DECT.CD' GROUP BY 1 ) diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql index 9315d0277..5e5f87dfc 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql @@ -1,10 +1,9 @@ WITH _u_0 AS ( SELECT - countrynotes.countrycode AS _u_1 - FROM wdi.countrynotes AS countrynotes - JOIN wdi.series AS series - ON countrynotes.seriescode = series.seriescode - AND series.seriescode = 'DT.DOD.DECT.CD' + countrycode AS _u_1 + FROM wdi.countrynotes + WHERE + seriescode = 'DT.DOD.DECT.CD' GROUP BY 1 ) diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql index 9315d0277..5e5f87dfc 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql @@ -1,10 +1,9 @@ WITH _u_0 AS ( SELECT - countrynotes.countrycode AS _u_1 - FROM wdi.countrynotes AS countrynotes - JOIN wdi.series AS series - ON countrynotes.seriescode = series.seriescode - AND series.seriescode = 'DT.DOD.DECT.CD' + countrycode AS _u_1 + FROM wdi.countrynotes + WHERE + seriescode = 'DT.DOD.DECT.CD' GROUP BY 1 ) diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql index 9315d0277..5e5f87dfc 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql @@ -1,10 +1,9 @@ WITH _u_0 AS ( SELECT - countrynotes.countrycode AS _u_1 - FROM wdi.countrynotes AS countrynotes - JOIN wdi.series AS series - ON countrynotes.seriescode = series.seriescode - AND series.seriescode = 'DT.DOD.DECT.CD' + countrycode AS _u_1 + FROM wdi.countrynotes + WHERE + seriescode = 'DT.DOD.DECT.CD' GROUP BY 1 ) From 844eb3d00ca414be1b5eef7f1feef02d0ea13765 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 31 Oct 2025 20:11:52 -0400 Subject: [PATCH 131/143] WIP singular/reverse revisions --- pydough/conversion/hybrid_tree.py | 7 ++++++- pydough/conversion/relational_simplification.py | 10 ++++++++-- tests/test_plan_refsols/correl_19.txt | 3 +-- tests/test_plan_refsols/epoch_num_predawn_cold_war.txt | 6 +++--- tests/test_sql_refsols/correl_19_sqlite.sql | 2 -- .../epoch_num_predawn_cold_war_ansi.sql | 2 +- .../epoch_num_predawn_cold_war_mysql.sql | 2 +- .../epoch_num_predawn_cold_war_postgres.sql | 2 +- .../epoch_num_predawn_cold_war_snowflake.sql | 2 +- .../epoch_num_predawn_cold_war_sqlite.sql | 2 +- 10 files changed, 23 insertions(+), 15 deletions(-) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 2f872229c..5e0ab2c90 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -853,7 +853,12 @@ def is_singular(self) -> bool: case _: return False # The current level is fine, so check any levels above it next. - return True if self.parent is None else self.parent.always_exists() + return True if self.parent is None else self.parent.is_singular() + + # def is_singular_reverse(self) -> bool: + # """ + # TODO + # """ def equals_ignoring_successors(self, other: "HybridTree") -> bool: """ diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 33410ea1f..54ca408af 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -1282,8 +1282,14 @@ def simplify_function_call( ) case _: - # All other cases remain non-simplified. - pass + # Simplify comparing an expression to itself as + # True/False. All other cases remain non-simplified. + if expr.inputs[0] == expr.inputs[1]: + is_eq: bool = expr.op in (pydop.EQU, pydop.LEQ, pydop.GEQ) + output_expr = LiteralExpression(is_eq, expr.data_type) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=is_eq + ) output_predicates.not_negative = True diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index 88c6c172d..ede8c4337 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,6 +1,5 @@ ROOT(columns=[('supplier_name', anything_s_name), ('n_super_cust', n_rows)], orderings=[(n_rows):desc_last], limit=5:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT()}) JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - FILTER(condition=s_nationkey == s_nationkey, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt index 077f041bc..f465f4290 100644 --- a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt +++ b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('n_events', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, columns={}) +ROOT(columns=[('n_events', ndistinct_ev_key)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_ev_key': NDISTINCT(ev_key)}) + JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, columns={'ev_key': t0.ev_key}) JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=t_name == 'Pre-Dawn':string, columns={'t_end_hour': t_end_hour, 't_start_hour': t_start_hour}) diff --git a/tests/test_sql_refsols/correl_19_sqlite.sql b/tests/test_sql_refsols/correl_19_sqlite.sql index 59e5c7059..795361957 100644 --- a/tests/test_sql_refsols/correl_19_sqlite.sql +++ b/tests/test_sql_refsols/correl_19_sqlite.sql @@ -5,8 +5,6 @@ FROM tpch.supplier AS supplier JOIN tpch.customer AS customer ON customer.c_acctbal > supplier.s_acctbal AND customer.c_nationkey = supplier.s_nationkey -WHERE - supplier.s_nationkey = supplier.s_nationkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_ansi.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_ansi.sql index a37de8ff0..c23e12993 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_ansi.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_ansi.sql @@ -5,7 +5,7 @@ WITH _s0 AS ( FROM events ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN times AS times ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s0.ev_dt AS DATETIME)) diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_mysql.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_mysql.sql index efc1d7023..80a0ac2b4 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_mysql.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_mysql.sql @@ -15,7 +15,7 @@ WITH _s0 AS ( 1 ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN TIMES AS TIMES ON TIMES.t_end_hour > HOUR(_s0.ev_dt) diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_postgres.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_postgres.sql index c1deb50ea..0c4383f57 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_postgres.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_postgres.sql @@ -15,7 +15,7 @@ WITH _s0 AS ( 1 ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN times AS times ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s0.ev_dt AS TIMESTAMP)) diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql index 5e8914950..5aae6c921 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql @@ -15,7 +15,7 @@ WITH _s0 AS ( 1 ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN times AS times ON times.t_end_hour > HOUR(CAST(_s0.ev_dt AS TIMESTAMP)) diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_sqlite.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_sqlite.sql index db07934cd..4562f3a60 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_sqlite.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_sqlite.sql @@ -15,7 +15,7 @@ WITH _s0 AS ( 1 ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN times AS times ON times.t_end_hour > CAST(STRFTIME('%H', _s0.ev_dt) AS INTEGER) From a13bb6af11b23190650442a1627dbcfc62973936 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 3 Nov 2025 13:15:09 -0500 Subject: [PATCH 132/143] Fixing bug with the substitution going right-to-left --- pydough/conversion/join_key_substitution.py | 2 +- .../aggregate_mixed_levels_simple.txt | 12 +++---- .../aggregate_on_function_call.txt | 8 ++--- .../aggregate_then_backref.txt | 8 ++--- tests/test_plan_refsols/correl_29.txt | 12 +++---- ...multiple_subcollections_alongside_aggs.txt | 10 +++--- .../count_single_subcollection.txt | 8 ++--- .../cryptbank_agg_04_raw.txt | 8 ++--- .../cryptbank_agg_04_rewrite.txt | 8 ++--- .../test_plan_refsols/deep_best_analysis.txt | 24 ++++++------- .../multi_partition_access_2.txt | 35 +++++++------------ ...ple_simple_aggregations_multiple_calcs.txt | 10 +++--- ...ltiple_simple_aggregations_single_calc.txt | 10 +++--- .../orders_sum_line_price.txt | 8 ++--- .../orders_sum_vs_count_line_price.txt | 8 ++--- .../test_plan_refsols/supplier_best_part.txt | 14 ++++---- tests/test_sql_refsols/correl_29_sqlite.sql | 28 +++++++-------- .../cryptbank_agg_04_raw_sqlite.sql | 10 +++--- .../cryptbank_agg_04_rewrite_sqlite.sql | 10 +++--- .../defog_dealership_adv6_ansi.sql | 4 +-- .../defog_dealership_adv6_mysql.sql | 4 +-- .../defog_dealership_adv6_postgres.sql | 4 +-- .../defog_dealership_adv6_snowflake.sql | 4 +-- .../defog_dealership_adv6_sqlite.sql | 4 +-- .../defog_ewallet_adv6_ansi.sql | 8 ++--- .../defog_ewallet_adv6_mysql.sql | 10 +++--- .../defog_ewallet_adv6_postgres.sql | 10 +++--- .../defog_ewallet_adv6_snowflake.sql | 8 ++--- .../defog_ewallet_adv6_sqlite.sql | 10 +++--- 29 files changed, 124 insertions(+), 175 deletions(-) diff --git a/pydough/conversion/join_key_substitution.py b/pydough/conversion/join_key_substitution.py index 55f839019..7b8f022d2 100644 --- a/pydough/conversion/join_key_substitution.py +++ b/pydough/conversion/join_key_substitution.py @@ -60,7 +60,7 @@ def visit_join(self, join: Join) -> RelationalNode: # substitute the left join keys with the right join keys. elif ( join.reverse_cardinality == JoinCardinality.SINGULAR_ACCESS - and rhs_refs <= lhs_keys + and lhs_refs <= lhs_keys ): for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): join_substitution[lhs_key] = rhs_key diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index 0b267e0e1..c48246b61 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,7 +1,5 @@ -ROOT(columns=[('order_key', o_orderkey), ('max_ratio', max_ratio)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) +ROOT(columns=[('order_key', l_orderkey), ('max_ratio', max_ratio)], orderings=[]) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_on_function_call.txt b/tests/test_plan_refsols/aggregate_on_function_call.txt index 54eca15c5..5313266ab 100644 --- a/tests/test_plan_refsols/aggregate_on_function_call.txt +++ b/tests/test_plan_refsols/aggregate_on_function_call.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('nation_name', n_nationkey), ('avg_consumer_value', max_expr)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_expr': t1.max_expr, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_expr': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('avg_consumer_value', avg_consumer_value)], orderings=[]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 7c8d13cc8..d041361c5 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,7 +1,5 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 72f58693b..00a187e5f 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,6 +1,6 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_name', anything_anything_anything_n_name), ('n_above_avg_customers', anything_anything_n_rows), ('n_above_avg_suppliers', agg_3_14), ('min_cust_acctbal', anything_min_c_acctbal), ('max_cust_acctbal', anything_max_c_acctbal)], orderings=[(anything_anything_anything_n_regionkey):asc_first, (anything_anything_anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_nationkey': t1.n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 's_nationkey': t1.s_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) @@ -12,9 +12,7 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_n SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_nationkey': t0.s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt index 7f2716e17..41716ca66 100644 --- a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt +++ b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('nation_name', n_nationkey), ('num_customers', n_rows), ('num_suppliers', agg_3), ('customer_to_supplier_wealth_ratio', DEFAULT_TO(sum_c_acctbal, 0:numeric) / DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('num_customers', n_rows), ('num_suppliers', agg_3), ('customer_to_supplier_wealth_ratio', DEFAULT_TO(sum_c_acctbal, 0:numeric) / DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.n_rows, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/count_single_subcollection.txt b/tests/test_plan_refsols/count_single_subcollection.txt index 9f3091e89..2618ad06c 100644 --- a/tests/test_plan_refsols/count_single_subcollection.txt +++ b/tests/test_plan_refsols/count_single_subcollection.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('nation_name', n_nationkey), ('num_customers', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('num_customers', n_rows)], orderings=[]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/cryptbank_agg_04_raw.txt b/tests/test_plan_refsols/cryptbank_agg_04_raw.txt index 6e2aa056c..673c8b1d2 100644 --- a/tests/test_plan_refsols/cryptbank_agg_04_raw.txt +++ b/tests/test_plan_refsols/cryptbank_agg_04_raw.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) - JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_unmask_a_balance': t1.sum_unmask_a_balance}) - SCAN(table=CRBNK.BRANCHES, columns={'b_key': b_key}) - AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) +ROOT(columns=[('branch_key', a_branchkey), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) + AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) diff --git a/tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt b/tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt index 6e2aa056c..673c8b1d2 100644 --- a/tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) - JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_unmask_a_balance': t1.sum_unmask_a_balance}) - SCAN(table=CRBNK.BRANCHES, columns={'b_key': b_key}) - AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) +ROOT(columns=[('branch_key', a_branchkey), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) + AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index a0c1f8f96..a9a88b56e 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,26 +1,22 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', key_5), ('c_bal', c_acctbal), ('cr_bal', account_balance_13), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', c_custkey)], orderings=[(n_name):asc_first], limit=10:numeric) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=n_nationkey == s_nationkey & RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=n_nationkey == s_nationkey & RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 132528e7a..cec6ecabc 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,30 +1,19 @@ -ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_sum_sbTxShares_1 / sum_count_sbTxShares_1), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares_1 / t0.sum_count_sbTxShares_1 & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t1.sum_sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) +ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', sum_sbTxShares / count_sbTxShares)], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sbTxShares / t0.count_sbTxShares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_sbTxShares': t1.sum_sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'avg_sbTxShares': AVG(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'avg_sbTxShares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index ad87610a6..684efa27a 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'c_nationkey': t0.c_nationkey, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt index 94e0925e0..d3daca766 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/orders_sum_line_price.txt b/tests/test_plan_refsols/orders_sum_line_price.txt index 0ffd28d94..1720fa8af 100644 --- a/tests/test_plan_refsols/orders_sum_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_line_price.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('okey', o_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) +ROOT(columns=[('okey', l_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt index 046adabcd..2ef503a8b 100644 --- a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('okey', o_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) +ROOT(columns=[('okey', l_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index a63790075..4b815e991 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,14 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('n_shipments', n_rows)], orderings=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (s_name):asc_first], limit=3:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + FILTER(condition=RANKING(args=[], partition=[l_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'l_suppkey': l_suppkey, 'n_rows': n_rows, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_sql_refsols/correl_29_sqlite.sql b/tests/test_sql_refsols/correl_29_sqlite.sql index 5ec76c6d8..964e96797 100644 --- a/tests/test_sql_refsols/correl_29_sqlite.sql +++ b/tests/test_sql_refsols/correl_29_sqlite.sql @@ -23,7 +23,7 @@ WITH _t5 AS ( ON _s1.avg_c_acctbal < _s3.c_acctbal AND _s3.c_nationkey = nation.n_nationkey GROUP BY 1 -), _s10 AS ( +), _s8 AS ( SELECT _s5.c_nationkey, MAX(_t3.anything_n_name) AS anything_anything_n_name, @@ -43,7 +43,7 @@ WITH _t5 AS ( s_acctbal, s_nationkey FROM tpch.supplier -), _s7 AS ( +), _s6 AS ( SELECT s_nationkey, AVG(s_acctbal) AS avg_s_acctbal @@ -52,21 +52,19 @@ WITH _t5 AS ( 1 ) SELECT - MAX(_s10.anything_anything_n_regionkey) AS region_key, - MAX(_s10.anything_anything_n_name) AS nation_name, - MAX(_s10.anything_n_rows) AS n_above_avg_customers, + MAX(_s8.anything_anything_n_regionkey) AS region_key, + MAX(_s8.anything_anything_n_name) AS nation_name, + MAX(_s8.anything_n_rows) AS n_above_avg_customers, COUNT(*) AS n_above_avg_suppliers, - MAX(_s10.min_c_acctbal) AS min_cust_acctbal, - MAX(_s10.max_c_acctbal) AS max_cust_acctbal -FROM _s10 AS _s10 -JOIN tpch.nation AS nation - ON _s10.c_nationkey = nation.n_nationkey -JOIN _s7 AS _s7 - ON _s7.s_nationkey = nation.n_nationkey -JOIN _t6 AS _s9 - ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey + MAX(_s8.min_c_acctbal) AS min_cust_acctbal, + MAX(_s8.max_c_acctbal) AS max_cust_acctbal +FROM _s8 AS _s8 +JOIN _s6 AS _s6 + ON _s6.s_nationkey = _s8.c_nationkey +JOIN _t6 AS _s7 + ON _s6.avg_s_acctbal < _s7.s_acctbal AND _s6.s_nationkey = _s7.s_nationkey GROUP BY - nation.n_nationkey + _s6.s_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/cryptbank_agg_04_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_04_raw_sqlite.sql index 992e6eccd..92217107e 100644 --- a/tests/test_sql_refsols/cryptbank_agg_04_raw_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_agg_04_raw_sqlite.sql @@ -1,4 +1,4 @@ -WITH _s1 AS ( +WITH _t0 AS ( SELECT a_branchkey, SUM(SQRT(a_balance)) AS sum_unmask_a_balance @@ -7,11 +7,9 @@ WITH _s1 AS ( 1 ) SELECT - branches.b_key AS branch_key, + a_branchkey AS branch_key, ROUND( - CAST(COALESCE(_s1.sum_unmask_a_balance, 0) AS REAL) / SUM(COALESCE(_s1.sum_unmask_a_balance, 0)) OVER (), + CAST(COALESCE(sum_unmask_a_balance, 0) AS REAL) / SUM(COALESCE(sum_unmask_a_balance, 0)) OVER (), 2 ) AS pct_total_wealth -FROM crbnk.branches AS branches -JOIN _s1 AS _s1 - ON _s1.a_branchkey = branches.b_key +FROM _t0 diff --git a/tests/test_sql_refsols/cryptbank_agg_04_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_04_rewrite_sqlite.sql index 992e6eccd..92217107e 100644 --- a/tests/test_sql_refsols/cryptbank_agg_04_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_agg_04_rewrite_sqlite.sql @@ -1,4 +1,4 @@ -WITH _s1 AS ( +WITH _t0 AS ( SELECT a_branchkey, SUM(SQRT(a_balance)) AS sum_unmask_a_balance @@ -7,11 +7,9 @@ WITH _s1 AS ( 1 ) SELECT - branches.b_key AS branch_key, + a_branchkey AS branch_key, ROUND( - CAST(COALESCE(_s1.sum_unmask_a_balance, 0) AS REAL) / SUM(COALESCE(_s1.sum_unmask_a_balance, 0)) OVER (), + CAST(COALESCE(sum_unmask_a_balance, 0) AS REAL) / SUM(COALESCE(sum_unmask_a_balance, 0)) OVER (), 2 ) AS pct_total_wealth -FROM crbnk.branches AS branches -JOIN _s1 AS _s1 - ON _s1.a_branchkey = branches.b_key +FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql index 66bcfbae9..1126345c2 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_ansi.sql @@ -14,8 +14,8 @@ FROM main.cars AS cars JOIN _t2 AS _t2 ON _t2.car_id = cars._id LEFT JOIN main.sales AS sales - ON _t2.car_id = sales.car_id + ON cars._id = sales.car_id GROUP BY - _t2.car_id + cars._id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql index ee2e74f1e..dbae4e04f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_mysql.sql @@ -13,8 +13,8 @@ FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN main.sales AS sales - ON _t.car_id = sales.car_id + ON cars._id = sales.car_id GROUP BY - _t.car_id + cars._id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql index 60e75c478..afd34c12e 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_postgres.sql @@ -13,8 +13,8 @@ FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN main.sales AS sales - ON _t.car_id = sales.car_id + ON cars._id = sales.car_id GROUP BY - _t.car_id + cars._id ORDER BY 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql index 1174a6185..10d15777f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql @@ -14,8 +14,8 @@ FROM main.cars AS cars JOIN _t2 AS _t2 ON _t2.car_id = cars._id LEFT JOIN main.sales AS sales - ON _t2.car_id = sales.car_id + ON cars._id = sales.car_id GROUP BY - _t2.car_id + cars._id ORDER BY 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql index c7aa0e56a..0d704d83a 100644 --- a/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv6_sqlite.sql @@ -13,8 +13,8 @@ FROM main.cars AS cars JOIN _t AS _t ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id LEFT JOIN main.sales AS sales - ON _t.car_id = sales.car_id + ON cars._id = sales.car_id GROUP BY - _t.car_id + cars._id ORDER BY 3 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv6_ansi.sql index f7d720393..25888775b 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_ansi.sql @@ -7,8 +7,6 @@ WITH _t0 AS ( ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY updated_at DESC NULLS FIRST) = 1 ) SELECT - users.uid AS user_id, - _t0.balance AS latest_balance -FROM main.users AS users -JOIN _t0 AS _t0 - ON _t0.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t0 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv6_mysql.sql index 617ba93ba..3b104b008 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_mysql.sql @@ -6,8 +6,8 @@ WITH _t AS ( FROM main.wallet_user_balance_daily ) SELECT - users.uid AS user_id, - _t.balance AS latest_balance -FROM main.users AS users -JOIN _t AS _t - ON _t._w = 1 AND _t.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv6_postgres.sql index 056bbf172..1dec440e4 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_postgres.sql @@ -6,8 +6,8 @@ WITH _t AS ( FROM main.wallet_user_balance_daily ) SELECT - users.uid AS user_id, - _t.balance AS latest_balance -FROM main.users AS users -JOIN _t AS _t - ON _t._w = 1 AND _t.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql index facbf73f3..8c42869d6 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql @@ -7,8 +7,6 @@ WITH _t0 AS ( ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY updated_at DESC) = 1 ) SELECT - users.uid AS user_id, - _t0.balance AS latest_balance -FROM main.users AS users -JOIN _t0 AS _t0 - ON _t0.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t0 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv6_sqlite.sql index 056bbf172..1dec440e4 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_sqlite.sql @@ -6,8 +6,8 @@ WITH _t AS ( FROM main.wallet_user_balance_daily ) SELECT - users.uid AS user_id, - _t.balance AS latest_balance -FROM main.users AS users -JOIN _t AS _t - ON _t._w = 1 AND _t.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t +WHERE + _w = 1 From 84264cc807abc2344a544f31ea1815bd5a56bf6f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 3 Nov 2025 13:19:49 -0500 Subject: [PATCH 133/143] [RUN CI] --- pydough/conversion/hybrid_tree.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 5e0ab2c90..be659f618 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -855,11 +855,6 @@ def is_singular(self) -> bool: # The current level is fine, so check any levels above it next. return True if self.parent is None else self.parent.is_singular() - # def is_singular_reverse(self) -> bool: - # """ - # TODO - # """ - def equals_ignoring_successors(self, other: "HybridTree") -> bool: """ Compares two hybrid trees without taking into account their From 0a65503b621b8cb1e062f57f873f7a7ecb6c3edf Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 3 Nov 2025 13:21:01 -0500 Subject: [PATCH 134/143] Removing dead file --- tests/test_plan_refsols/top_lineitems_info.txt | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 tests/test_plan_refsols/top_lineitems_info.txt diff --git a/tests/test_plan_refsols/top_lineitems_info.txt b/tests/test_plan_refsols/top_lineitems_info.txt deleted file mode 100644 index 4f4492cab..000000000 --- a/tests/test_plan_refsols/top_lineitems_info.txt +++ /dev/null @@ -1,11 +0,0 @@ -ROOT(columns=[('order_key', l_orderkey), ('line_number', l_linenumber), ('part_size', p_size), ('supplier_nation', s_nationkey)], orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_size': t0.p_size, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_size': t1.p_size}) - LIMIT(limit=5:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first]) - SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_size': t1.p_size, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) From 2cdf361309902aafbd464156dc56d608a057a220 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 7 Nov 2025 13:22:41 -0500 Subject: [PATCH 135/143] [RUN CI] --- pydough/conversion/join_key_substitution.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pydough/conversion/join_key_substitution.py b/pydough/conversion/join_key_substitution.py index 7b8f022d2..74f2ded1d 100644 --- a/pydough/conversion/join_key_substitution.py +++ b/pydough/conversion/join_key_substitution.py @@ -1,5 +1,7 @@ """ -TODO +Logic for switching references to join keys from one side of a join to the other +when certain conditions are met, thus allowing the join to be removed by the +column pruner. """ from pydough.relational import ( @@ -20,7 +22,7 @@ class JoinKeySubstitutionShuttle(RelationalShuttle): """ - TODO + The relational shuttle that performs join key substitution optimization. """ def visit_join(self, join: Join) -> RelationalNode: @@ -87,7 +89,13 @@ def visit_join(self, join: Join) -> RelationalNode: def join_key_substitution(root: RelationalNode) -> RelationalNode: """ - TODO + The main entry point for join key substitution optimization. + + Args: + `root`: The root of the relational tree being optimized. + + Returns: + The optimized relational tree. """ shuttle: JoinKeySubstitutionShuttle = JoinKeySubstitutionShuttle() return root.accept_shuttle(shuttle) From 9133bcbc69683eb7857b8ae9c47221ef3508566f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 24 Nov 2025 10:16:41 -0800 Subject: [PATCH 136/143] Simplification revisions [RUN CI] --- .../conversion/join_aggregate_transpose.py | 40 ++++++++----- pydough/sqlglot/override_simplify.py | 58 +++++++++++++++++-- .../defog_academic_gen13_ansi.sql | 2 +- .../defog_academic_gen13_mysql.sql | 2 +- .../defog_academic_gen13_postgres.sql | 2 +- .../defog_academic_gen13_snowflake.sql | 2 +- .../defog_academic_gen13_sqlite.sql | 2 +- .../defog_broker_adv15_snowflake.sql | 2 +- .../defog_broker_adv9_snowflake.sql | 13 ++--- .../defog_dealership_adv1_snowflake.sql | 13 ++--- .../defog_ewallet_adv14_snowflake.sql | 2 +- .../defog_ewallet_adv2_snowflake.sql | 4 +- .../defog_restaurants_gen12_ansi.sql | 8 +-- .../defog_restaurants_gen12_mysql.sql | 8 +-- .../defog_restaurants_gen12_postgres.sql | 9 +-- .../defog_restaurants_gen12_snowflake.sql | 8 +-- .../defog_restaurants_gen12_sqlite.sql | 8 +-- .../defog_restaurants_gen13_ansi.sql | 8 +-- .../defog_restaurants_gen13_mysql.sql | 8 +-- .../defog_restaurants_gen13_postgres.sql | 9 +-- .../defog_restaurants_gen13_snowflake.sql | 8 +-- .../defog_restaurants_gen13_sqlite.sql | 8 +-- .../defog_restaurants_gen14_ansi.sql | 9 +-- .../defog_restaurants_gen14_mysql.sql | 9 +-- .../defog_restaurants_gen14_postgres.sql | 9 +-- .../defog_restaurants_gen14_snowflake.sql | 9 +-- .../defog_restaurants_gen14_sqlite.sql | 9 +-- .../defog_restaurants_gen15_ansi.sql | 2 +- .../defog_restaurants_gen15_mysql.sql | 2 +- .../defog_restaurants_gen15_postgres.sql | 2 +- .../defog_restaurants_gen15_snowflake.sql | 2 +- .../defog_restaurants_gen15_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q12_snowflake.sql | 4 +- 33 files changed, 118 insertions(+), 165 deletions(-) diff --git a/pydough/conversion/join_aggregate_transpose.py b/pydough/conversion/join_aggregate_transpose.py index fa6adbf1c..35713192d 100644 --- a/pydough/conversion/join_aggregate_transpose.py +++ b/pydough/conversion/join_aggregate_transpose.py @@ -93,15 +93,15 @@ def generate_name(self, base: str, used_names: Iterable[str]) -> str: """ if base not in used_names: return base - i = 0 + i: int = 0 while True: - name = f"{base}_{i}" + name: str = f"{base}_{i}" if name not in used_names: return name i += 1 def join_aggregate_transpose( - self, join: Join, aggregate: Aggregate, is_left: bool + self, join: Join, aggregate: Aggregate, is_left_agg: bool ) -> RelationalNode | None: """ Transposes a Join above an Aggregate into an Aggregate above a Join, @@ -111,7 +111,7 @@ def join_aggregate_transpose( Args: `join`: the Join node above the Aggregate. `aggregate`: the Aggregate node that is the left input to the Join. - `is_left`: whether the Aggregate is the left input to the Join + `is_left_agg`: whether the Aggregate is the left input to the Join (True) or the right input (False). Returns: @@ -126,12 +126,12 @@ def join_aggregate_transpose( # filtering (since the point of joining before aggregation is to reduce # the number of rows to aggregate). cardinality: JoinCardinality = ( - join.cardinality if is_left else join.reverse_cardinality + join.cardinality if is_left_agg else join.reverse_cardinality ) - left_join_case = ( + left_join_case: bool = ( join.join_type == JoinType.LEFT - and not is_left + and not is_left_agg and all( agg.op in JoinAggregateTransposeShuttle.left_join_case_ops for agg in aggregate.aggregations.values() @@ -144,7 +144,7 @@ def join_aggregate_transpose( if not ( ( (join.join_type == JoinType.INNER) - or (join.join_type == JoinType.SEMI and is_left) + or (join.join_type == JoinType.SEMI and is_left_agg) or left_join_case ) and cardinality.filters @@ -155,7 +155,9 @@ def join_aggregate_transpose( # The alias of the input to the join that corresponds to the # aggregate. desired_alias: str | None = ( - join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + join.default_input_aliases[0] + if is_left_agg + else join.default_input_aliases[1] ) # Find all of the columns used in the join condition that come from the @@ -177,17 +179,23 @@ def join_aggregate_transpose( # Extract the join key references from both sides of the join in the # order they appear in the join condition. + agg_key_refs: list[ColumnReference] + non_agg_key_refs: list[ColumnReference] agg_key_refs, non_agg_key_refs = extract_equijoin_keys(join) - if not is_left: + if not is_left_agg: agg_key_refs, non_agg_key_refs = non_agg_key_refs, agg_key_refs # Obtain the input aliases for both sides of the join, identified with # which one belongs to the aggregate versus the other input. agg_alias: str | None = ( - join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + join.default_input_aliases[0] + if is_left_agg + else join.default_input_aliases[1] ) non_agg_alias: str | None = ( - join.default_input_aliases[1] if is_left else join.default_input_aliases[0] + join.default_input_aliases[1] + if is_left_agg + else join.default_input_aliases[0] ) # Now that the transpose is deemed possible, if in the left join @@ -251,7 +259,7 @@ def join_aggregate_transpose( # happening before the join. new_cardinality: JoinCardinality = join.cardinality new_reverse_cardinality: JoinCardinality = join.reverse_cardinality - if is_left: + if is_left_agg: new_reverse_cardinality = new_reverse_cardinality.add_plural() else: new_cardinality = new_cardinality.add_plural() @@ -280,9 +288,11 @@ def join_aggregate_transpose( # other input to the join, as these shall be the two inputs to the new # join. agg_input: RelationalNode = aggregate.inputs[0] - non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] + non_agg_input: RelationalNode = ( + join.inputs[1] if is_left_agg else join.inputs[0] + ) new_join_inputs: list[RelationalNode] = ( - [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] + [agg_input, non_agg_input] if is_left_agg else [non_agg_input, agg_input] ) # Start by placing all of the columns from the aggregate node's input diff --git a/pydough/sqlglot/override_simplify.py b/pydough/sqlglot/override_simplify.py index 73509630b..ff5eb3998 100644 --- a/pydough/sqlglot/override_simplify.py +++ b/pydough/sqlglot/override_simplify.py @@ -129,6 +129,7 @@ def _simplify(expression, root=True): node = rewrite_case_to_nullif(node) node = rewrite_coalesce_nullif(node) node = rewrite_sum_nullif(node) + node = rewrite_coalesce_count(node) if constant_propagation: node = propagate_constants(node, root) @@ -149,6 +150,9 @@ def _simplify(expression, root=True): node = sort_comparison(node) node = simplify_startswith(node) + # PyDough Change: new post-order transformations + node = rewrite_nullif_coalesce(node) + if root: expression.replace(node) return node @@ -276,7 +280,7 @@ def rewrite_coalesce_nullif(expr: exp.Expression) -> exp.Expression: """ Rewrite expressions like `COALESCE(NULLIF(x, y), z)` to `CASE WHEN x = y THEN z ELSE x END`, or if `y` and `z` are the same then - just to `x`. + just to `COALESCE(x, z)`. Args: `expr`: The expression to rewrite. @@ -296,11 +300,11 @@ def rewrite_coalesce_nullif(expr: exp.Expression) -> exp.Expression: if not isinstance(first, exp.Nullif): return expr - lhs = first.args.get("this") - rhs = first.args.get("expression") + lhs: exp.Expression = first.args.get("this") + rhs: exp.Expression = first.args.get("expression") if rhs == second: - return lhs + return exp.Coalesce(this=lhs, expressions=[second], copy=False) return exp.Case( whens=[ @@ -332,10 +336,52 @@ def rewrite_sum_nullif(expr: exp.Expression) -> exp.Expression: if not isinstance(arg, exp.Nullif): return expr - lhs = arg.args.get("this") - rhs = arg.args.get("expression") + lhs: exp.Expression = arg.args.get("this") + rhs: exp.Expression = arg.args.get("expression") if isinstance(rhs, exp.Literal) and rhs.is_number and float(rhs.this) == 0: return exp.Sum(this=lhs, copy=False) return expr + + +def rewrite_coalesce_count(expr: exp.Expression) -> exp.Expression: + """ + Rewrite `COALESCE(COUNT(x), 0)` to `COUNT(x)`, and does the same for + `COALESCE(COUNT_IF(x), 0)`. + + Args: + `expr`: The expression to rewrite. + + Returns: + The rewritten expression. + """ + if not isinstance(expr, exp.Coalesce): + return expr + + return expr.this if isinstance(expr.this, (exp.Count, exp.CountIf)) else expr + + +def rewrite_nullif_coalesce(expr: exp.Expression) -> exp.Expression: + """ + Rewrite `NULLIF(COALESCE(x, y), y)` to `NULLIF(x, y)`. + + Args: + `expr`: The expression to rewrite. + + Returns: + The rewritten expression. + """ + if not isinstance(expr, exp.Nullif): + return expr + + lhs: exp.Expression = expr.args.get("this") + rhs: exp.Expression = expr.args.get("expression") + + if not isinstance(lhs, exp.Coalesce) or len(lhs.expressions) != 1: + return expr + + if lhs.expressions[0] == rhs: + return exp.Nullif(this=lhs.args.get("this"), expression=rhs, copy=False) + else: + return expr diff --git a/tests/test_sql_refsols/defog_academic_gen13_ansi.sql b/tests/test_sql_refsols/defog_academic_gen13_ansi.sql index bdcb0a27f..8a5250a76 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_ansi.sql @@ -15,7 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - COALESCE(_s1.n_rows, 0) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio + COALESCE(_s1.n_rows, 0) / NULLIF(_s3.n_rows, 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen13_mysql.sql b/tests/test_sql_refsols/defog_academic_gen13_mysql.sql index bdcb0a27f..8a5250a76 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_mysql.sql @@ -15,7 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - COALESCE(_s1.n_rows, 0) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio + COALESCE(_s1.n_rows, 0) / NULLIF(_s3.n_rows, 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen13_postgres.sql b/tests/test_sql_refsols/defog_academic_gen13_postgres.sql index 39f7ef9f2..a5b45bbac 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_postgres.sql @@ -15,7 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - CAST(COALESCE(_s1.n_rows, 0) AS DOUBLE PRECISION) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio + CAST(COALESCE(_s1.n_rows, 0) AS DOUBLE PRECISION) / NULLIF(_s3.n_rows, 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen13_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen13_snowflake.sql index bdcb0a27f..8a5250a76 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_snowflake.sql @@ -15,7 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - COALESCE(_s1.n_rows, 0) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio + COALESCE(_s1.n_rows, 0) / NULLIF(_s3.n_rows, 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen13_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen13_sqlite.sql index 7b31bd18b..034fbc55f 100644 --- a/tests/test_sql_refsols/defog_academic_gen13_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen13_sqlite.sql @@ -15,7 +15,7 @@ WITH _s1 AS ( ) SELECT domain.did AS domain_id, - CAST(COALESCE(_s1.n_rows, 0) AS REAL) / NULLIF(COALESCE(_s3.n_rows, 0), 0) AS ratio + CAST(COALESCE(_s1.n_rows, 0) AS REAL) / NULLIF(_s3.n_rows, 0) AS ratio FROM main.domain AS domain LEFT JOIN _s1 AS _s1 ON _s1.did = domain.did diff --git a/tests/test_sql_refsols/defog_broker_adv15_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv15_snowflake.sql index e735bcfcd..94bef5fce 100644 --- a/tests/test_sql_refsols/defog_broker_adv15_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv15_snowflake.sql @@ -1,7 +1,7 @@ SELECT sbcustcountry AS country, 100 * ( - COALESCE(COUNT_IF(sbcuststatus = 'active'), 0) / COUNT(*) + COUNT_IF(sbcuststatus = 'active') / COUNT(*) ) AS ar FROM main.sbcustomer WHERE diff --git a/tests/test_sql_refsols/defog_broker_adv9_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv9_snowflake.sql index e98831783..45c9d4417 100644 --- a/tests/test_sql_refsols/defog_broker_adv9_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv9_snowflake.sql @@ -12,14 +12,11 @@ SELECT ) ) AS week, COUNT(*) AS num_transactions, - COALESCE( - COUNT_IF(( - ( - DAYOFWEEK(sbtransaction.sbtxdatetime) + 6 - ) % 7 - ) IN (5, 6)), - 0 - ) AS weekend_transactions + COUNT_IF(( + ( + DAYOFWEEK(sbtransaction.sbtxdatetime) + 6 + ) % 7 + ) IN (5, 6)) AS weekend_transactions FROM main.sbtransaction AS sbtransaction JOIN main.sbticker AS sbticker ON sbticker.sbtickerid = sbtransaction.sbtxtickerid diff --git a/tests/test_sql_refsols/defog_dealership_adv1_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv1_snowflake.sql index 28c10f073..4fcbd4db9 100644 --- a/tests/test_sql_refsols/defog_dealership_adv1_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv1_snowflake.sql @@ -12,14 +12,11 @@ SELECT ) ) AS payment_week, COUNT(*) AS total_payments, - COALESCE( - COUNT_IF(( - ( - DAYOFWEEK(payments_received.payment_date) + 6 - ) % 7 - ) IN (5, 6)), - 0 - ) AS weekend_payments + COUNT_IF(( + ( + DAYOFWEEK(payments_received.payment_date) + 6 + ) % 7 + ) IN (5, 6)) AS weekend_payments FROM main.payments_received AS payments_received JOIN main.sales AS sales ON payments_received.sale_id = sales._id AND sales.sale_price > 30000 diff --git a/tests/test_sql_refsols/defog_ewallet_adv14_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv14_snowflake.sql index 3cfa5fa6f..afce8a628 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv14_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv14_snowflake.sql @@ -1,5 +1,5 @@ SELECT - COALESCE(COUNT_IF(status = 'success'), 0) / COUNT(*) AS _expr0 + COUNT_IF(status = 'success') / COUNT(*) AS _expr0 FROM main.wallet_transactions_daily WHERE DATEDIFF(MONTH, CAST(created_at AS DATETIME), CURRENT_TIMESTAMP()) = 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv2_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv2_snowflake.sql index bd7a73fe3..9d45a1ed5 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv2_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv2_snowflake.sql @@ -12,11 +12,11 @@ SELECT ) ) AS week, COUNT(*) AS num_notifs, - COALESCE(COUNT_IF(( + COUNT_IF(( ( DAYOFWEEK(notifications.created_at) + 6 ) % 7 - ) IN (5, 6)), 0) AS weekend_notifs + ) IN (5, 6)) AS weekend_notifs FROM main.notifications AS notifications JOIN main.users AS users ON notifications.user_id = users.uid AND users.country IN ('US', 'CA') diff --git a/tests/test_sql_refsols/defog_restaurants_gen12_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen12_ansi.sql index 9afcc60a6..ece2fa784 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen12_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen12_ansi.sql @@ -1,9 +1,3 @@ SELECT - COALESCE(SUM(rating > 4.0), 0) / CASE - WHEN ( - NOT SUM(rating < 4.0) IS NULL AND SUM(rating < 4.0) <> 0 - ) - THEN COALESCE(SUM(rating < 4.0), 0) - ELSE NULL - END AS ratio + COALESCE(SUM(rating > 4.0), 0) / NULLIF(SUM(rating < 4.0), 0) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen12_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen12_mysql.sql index 9afcc60a6..ece2fa784 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen12_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen12_mysql.sql @@ -1,9 +1,3 @@ SELECT - COALESCE(SUM(rating > 4.0), 0) / CASE - WHEN ( - NOT SUM(rating < 4.0) IS NULL AND SUM(rating < 4.0) <> 0 - ) - THEN COALESCE(SUM(rating < 4.0), 0) - ELSE NULL - END AS ratio + COALESCE(SUM(rating > 4.0), 0) / NULLIF(SUM(rating < 4.0), 0) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen12_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen12_postgres.sql index fc66f22ba..c59b4a694 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen12_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen12_postgres.sql @@ -1,10 +1,3 @@ SELECT - CAST(COALESCE(SUM(CASE WHEN rating > 4.0 THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / CASE - WHEN ( - NOT SUM(CASE WHEN rating < 4.0 THEN 1 ELSE 0 END) IS NULL - AND SUM(CASE WHEN rating < 4.0 THEN 1 ELSE 0 END) <> 0 - ) - THEN COALESCE(SUM(CASE WHEN rating < 4.0 THEN 1 ELSE 0 END), 0) - ELSE NULL - END AS ratio + CAST(COALESCE(SUM(CASE WHEN rating > 4.0 THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / NULLIF(SUM(CASE WHEN rating < 4.0 THEN 1 ELSE 0 END), 0) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen12_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen12_snowflake.sql index 9d1bcf6ad..4b6a300ba 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen12_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen12_snowflake.sql @@ -1,9 +1,3 @@ SELECT - COALESCE(COUNT_IF(rating > 4.0), 0) / CASE - WHEN ( - COUNT_IF(rating < 4.0) <> 0 AND NOT COUNT_IF(rating < 4.0) IS NULL - ) - THEN COALESCE(COUNT_IF(rating < 4.0), 0) - ELSE NULL - END AS ratio + COUNT_IF(rating > 4.0) / NULLIF(COUNT_IF(rating < 4.0), 0) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen12_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen12_sqlite.sql index 837e07fbc..84b0a470e 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen12_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen12_sqlite.sql @@ -1,9 +1,3 @@ SELECT - CAST(COALESCE(SUM(rating > 4.0), 0) AS REAL) / CASE - WHEN ( - NOT SUM(rating < 4.0) IS NULL AND SUM(rating < 4.0) <> 0 - ) - THEN COALESCE(SUM(rating < 4.0), 0) - ELSE NULL - END AS ratio + CAST(COALESCE(SUM(rating > 4.0), 0) AS REAL) / NULLIF(SUM(rating < 4.0), 0) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen13_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen13_ansi.sql index e242399d5..7742fc35b 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen13_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen13_ansi.sql @@ -1,11 +1,5 @@ SELECT - COALESCE(SUM(rating > 4.0), 0) / CASE - WHEN ( - NOT SUM(rating < 4.0) IS NULL AND SUM(rating < 4.0) <> 0 - ) - THEN COALESCE(SUM(rating < 4.0), 0) - ELSE NULL - END AS ratio + COALESCE(SUM(rating > 4.0), 0) / NULLIF(SUM(rating < 4.0), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'new york' diff --git a/tests/test_sql_refsols/defog_restaurants_gen13_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen13_mysql.sql index e242399d5..7742fc35b 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen13_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen13_mysql.sql @@ -1,11 +1,5 @@ SELECT - COALESCE(SUM(rating > 4.0), 0) / CASE - WHEN ( - NOT SUM(rating < 4.0) IS NULL AND SUM(rating < 4.0) <> 0 - ) - THEN COALESCE(SUM(rating < 4.0), 0) - ELSE NULL - END AS ratio + COALESCE(SUM(rating > 4.0), 0) / NULLIF(SUM(rating < 4.0), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'new york' diff --git a/tests/test_sql_refsols/defog_restaurants_gen13_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen13_postgres.sql index 7d950c523..7433576cd 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen13_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen13_postgres.sql @@ -1,12 +1,5 @@ SELECT - CAST(COALESCE(SUM(CASE WHEN rating > 4.0 THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / CASE - WHEN ( - NOT SUM(CASE WHEN rating < 4.0 THEN 1 ELSE 0 END) IS NULL - AND SUM(CASE WHEN rating < 4.0 THEN 1 ELSE 0 END) <> 0 - ) - THEN COALESCE(SUM(CASE WHEN rating < 4.0 THEN 1 ELSE 0 END), 0) - ELSE NULL - END AS ratio + CAST(COALESCE(SUM(CASE WHEN rating > 4.0 THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / NULLIF(SUM(CASE WHEN rating < 4.0 THEN 1 ELSE 0 END), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'new york' diff --git a/tests/test_sql_refsols/defog_restaurants_gen13_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen13_snowflake.sql index 615701afe..57b28518e 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen13_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen13_snowflake.sql @@ -1,11 +1,5 @@ SELECT - COALESCE(COUNT_IF(rating > 4.0), 0) / CASE - WHEN ( - COUNT_IF(rating < 4.0) <> 0 AND NOT COUNT_IF(rating < 4.0) IS NULL - ) - THEN COALESCE(COUNT_IF(rating < 4.0), 0) - ELSE NULL - END AS ratio + COUNT_IF(rating > 4.0) / NULLIF(COUNT_IF(rating < 4.0), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'new york' diff --git a/tests/test_sql_refsols/defog_restaurants_gen13_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen13_sqlite.sql index 65e74ad5c..246b16f4d 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen13_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen13_sqlite.sql @@ -1,11 +1,5 @@ SELECT - CAST(COALESCE(SUM(rating > 4.0), 0) AS REAL) / CASE - WHEN ( - NOT SUM(rating < 4.0) IS NULL AND SUM(rating < 4.0) <> 0 - ) - THEN COALESCE(SUM(rating < 4.0), 0) - ELSE NULL - END AS ratio + CAST(COALESCE(SUM(rating > 4.0), 0) AS REAL) / NULLIF(SUM(rating < 4.0), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'new york' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql index 6d482b5af..d9975b8d2 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql @@ -1,12 +1,5 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) / CASE - WHEN ( - NOT SUM(LOWER(food_type) <> 'vegan') IS NULL - AND SUM(LOWER(food_type) <> 'vegan') <> 0 - ) - THEN COALESCE(SUM(LOWER(food_type) <> 'vegan'), 0) - ELSE NULL - END AS ratio + COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql index 6d482b5af..d9975b8d2 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql @@ -1,12 +1,5 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) / CASE - WHEN ( - NOT SUM(LOWER(food_type) <> 'vegan') IS NULL - AND SUM(LOWER(food_type) <> 'vegan') <> 0 - ) - THEN COALESCE(SUM(LOWER(food_type) <> 'vegan'), 0) - ELSE NULL - END AS ratio + COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql index a184ccfef..74d49c9a3 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql @@ -1,12 +1,5 @@ SELECT - CAST(COALESCE(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / CASE - WHEN ( - NOT SUM(CASE WHEN LOWER(food_type) <> 'vegan' THEN 1 ELSE 0 END) IS NULL - AND SUM(CASE WHEN LOWER(food_type) <> 'vegan' THEN 1 ELSE 0 END) <> 0 - ) - THEN COALESCE(SUM(CASE WHEN LOWER(food_type) <> 'vegan' THEN 1 ELSE 0 END), 0) - ELSE NULL - END AS ratio + CAST(COALESCE(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / NULLIF(SUM(CASE WHEN LOWER(food_type) <> 'vegan' THEN 1 ELSE 0 END), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql index 70767f83e..9f437c5c6 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql @@ -1,12 +1,5 @@ SELECT - COALESCE(COUNT_IF(LOWER(food_type) = 'vegan'), 0) / CASE - WHEN ( - COUNT_IF(LOWER(food_type) <> 'vegan') <> 0 - AND NOT COUNT_IF(LOWER(food_type) <> 'vegan') IS NULL - ) - THEN COALESCE(COUNT_IF(LOWER(food_type) <> 'vegan'), 0) - ELSE NULL - END AS ratio + COUNT_IF(LOWER(food_type) = 'vegan') / NULLIF(COUNT_IF(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql index 59b2ad815..06f824271 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql @@ -1,12 +1,5 @@ SELECT - CAST(COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) AS REAL) / CASE - WHEN ( - NOT SUM(LOWER(food_type) <> 'vegan') IS NULL - AND SUM(LOWER(food_type) <> 'vegan') <> 0 - ) - THEN COALESCE(SUM(LOWER(food_type) <> 'vegan'), 0) - ELSE NULL - END AS ratio + CAST(COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) AS REAL) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql index f1d9a4ac5..c15bdb8d1 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql @@ -1,5 +1,5 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'italian'), 0) / CASE WHEN COUNT(*) <> 0 THEN COUNT(*) ELSE NULL END AS ratio + COALESCE(SUM(LOWER(food_type) = 'italian'), 0) / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql index f1d9a4ac5..c15bdb8d1 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql @@ -1,5 +1,5 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'italian'), 0) / CASE WHEN COUNT(*) <> 0 THEN COUNT(*) ELSE NULL END AS ratio + COALESCE(SUM(LOWER(food_type) = 'italian'), 0) / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql index 68ac00f1e..5016a56a6 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql @@ -1,5 +1,5 @@ SELECT - CAST(COALESCE(SUM(CASE WHEN LOWER(food_type) = 'italian' THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / CASE WHEN COUNT(*) <> 0 THEN COUNT(*) ELSE NULL END AS ratio + CAST(COALESCE(SUM(CASE WHEN LOWER(food_type) = 'italian' THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen15_snowflake.sql index f31117c9a..d3d759103 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_snowflake.sql @@ -1,5 +1,5 @@ SELECT - COALESCE(COUNT_IF(LOWER(food_type) = 'italian'), 0) / CASE WHEN COUNT(*) <> 0 THEN COUNT(*) ELSE NULL END AS ratio + COUNT_IF(LOWER(food_type) = 'italian') / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql index 6cf3b4b72..c5dcfc7fb 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql @@ -1,5 +1,5 @@ SELECT - CAST(COALESCE(SUM(LOWER(food_type) = 'italian'), 0) AS REAL) / CASE WHEN COUNT(*) <> 0 THEN COUNT(*) ELSE NULL END AS ratio + CAST(COALESCE(SUM(LOWER(food_type) = 'italian'), 0) AS REAL) / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/tpch_q12_snowflake.sql b/tests/test_sql_refsols/tpch_q12_snowflake.sql index b02a4f3ff..cde94662f 100644 --- a/tests/test_sql_refsols/tpch_q12_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q12_snowflake.sql @@ -1,7 +1,7 @@ SELECT lineitem.l_shipmode AS L_SHIPMODE, - COALESCE(COUNT_IF(orders.o_orderpriority IN ('1-URGENT', '2-HIGH')), 0) AS HIGH_LINE_COUNT, - COALESCE(COUNT_IF(NOT orders.o_orderpriority IN ('1-URGENT', '2-HIGH')), 0) AS LOW_LINE_COUNT + COUNT_IF(orders.o_orderpriority IN ('1-URGENT', '2-HIGH')) AS HIGH_LINE_COUNT, + COUNT_IF(NOT orders.o_orderpriority IN ('1-URGENT', '2-HIGH')) AS LOW_LINE_COUNT FROM tpch.lineitem AS lineitem JOIN tpch.orders AS orders ON lineitem.l_orderkey = orders.o_orderkey From 586d7e0aa9f35a83490383e4f1895c8fc4514b75 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 26 Nov 2025 10:11:50 -0800 Subject: [PATCH 137/143] adding comments --- pydough/conversion/join_key_substitution.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pydough/conversion/join_key_substitution.py b/pydough/conversion/join_key_substitution.py index 74f2ded1d..d4393784a 100644 --- a/pydough/conversion/join_key_substitution.py +++ b/pydough/conversion/join_key_substitution.py @@ -1,7 +1,12 @@ """ Logic for switching references to join keys from one side of a join to the other when certain conditions are met, thus allowing the join to be removed by the -column pruner. +column pruner. The conditions are: +- The join is an inner join. +- The join has equi-join keys. +- The cardinality in either direction is singular-access. +- The only columns used from one side of the join (the one being referenced in + a singular-access manner) are the join keys (or a subset thereof). """ from pydough.relational import ( From 25f5904430f9bfe7600089a8cb1bb7c9f0847724 Mon Sep 17 00:00:00 2001 From: knassre-bodo <105652923+knassre-bodo@users.noreply.github.com> Date: Wed, 26 Nov 2025 13:15:44 -0500 Subject: [PATCH 138/143] Update pydough/conversion/join_key_substitution.py Co-authored-by: Hadia Ahmed --- pydough/conversion/join_key_substitution.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pydough/conversion/join_key_substitution.py b/pydough/conversion/join_key_substitution.py index d4393784a..6baaaac79 100644 --- a/pydough/conversion/join_key_substitution.py +++ b/pydough/conversion/join_key_substitution.py @@ -52,9 +52,11 @@ def visit_join(self, join: Join) -> RelationalNode: if ref.input_name == join.default_input_aliases[0] } rhs_refs = col_refs - lhs_refs - # If the left side is singular access, and all the columns used - # from the right side are just the join keys, then we can - # substitute the right join keys with the left join keys. + # If each row on the left side (LHS) matches exactly one row on the right side (RHS) + # (i.e., singular access) + # and the query only references columns from the RHS that are join keys, + # then we can substitute the RHS join keys with the corresponding LHS join keys. + # This allows the join to potentially be removed later since it adds no new data. if ( join.cardinality == JoinCardinality.SINGULAR_ACCESS and rhs_refs <= rhs_keys From b86c5dce9150bff2441c833025aa1965947a91cc Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 26 Nov 2025 10:15:57 -0800 Subject: [PATCH 139/143] [RUN CI] From 1bca6af8cfa324127131b26627e545c5185e5de6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Nov 2025 18:16:04 +0000 Subject: [PATCH 140/143] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydough/conversion/join_key_substitution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydough/conversion/join_key_substitution.py b/pydough/conversion/join_key_substitution.py index 6baaaac79..0c48dbc5d 100644 --- a/pydough/conversion/join_key_substitution.py +++ b/pydough/conversion/join_key_substitution.py @@ -54,7 +54,7 @@ def visit_join(self, join: Join) -> RelationalNode: rhs_refs = col_refs - lhs_refs # If each row on the left side (LHS) matches exactly one row on the right side (RHS) # (i.e., singular access) - # and the query only references columns from the RHS that are join keys, + # and the query only references columns from the RHS that are join keys, # then we can substitute the RHS join keys with the corresponding LHS join keys. # This allows the join to potentially be removed later since it adds no new data. if ( From 07e6ab81b61308a8a4e14c244e4c2d890b3db7c9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 26 Nov 2025 10:28:29 -0800 Subject: [PATCH 141/143] [RUN CI] From 766604dbf9cce29301245eeffa34bc1b7d311891 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 26 Nov 2025 10:33:42 -0800 Subject: [PATCH 142/143] Adding many-net-filter tests --- tests/test_pipeline_tpch_custom.py | 214 ++++++++++++++++++ tests/test_plan_refsols/many_net_filter_1.txt | 6 + .../test_plan_refsols/many_net_filter_10.txt | 10 + .../test_plan_refsols/many_net_filter_11.txt | 13 ++ tests/test_plan_refsols/many_net_filter_2.txt | 6 + tests/test_plan_refsols/many_net_filter_3.txt | 6 + tests/test_plan_refsols/many_net_filter_4.txt | 10 + tests/test_plan_refsols/many_net_filter_5.txt | 10 + tests/test_plan_refsols/many_net_filter_6.txt | 10 + tests/test_plan_refsols/many_net_filter_7.txt | 10 + tests/test_plan_refsols/many_net_filter_8.txt | 10 + tests/test_plan_refsols/many_net_filter_9.txt | 10 + 12 files changed, 315 insertions(+) create mode 100644 tests/test_plan_refsols/many_net_filter_1.txt create mode 100644 tests/test_plan_refsols/many_net_filter_10.txt create mode 100644 tests/test_plan_refsols/many_net_filter_11.txt create mode 100644 tests/test_plan_refsols/many_net_filter_2.txt create mode 100644 tests/test_plan_refsols/many_net_filter_3.txt create mode 100644 tests/test_plan_refsols/many_net_filter_4.txt create mode 100644 tests/test_plan_refsols/many_net_filter_5.txt create mode 100644 tests/test_plan_refsols/many_net_filter_6.txt create mode 100644 tests/test_plan_refsols/many_net_filter_7.txt create mode 100644 tests/test_plan_refsols/many_net_filter_8.txt create mode 100644 tests/test_plan_refsols/many_net_filter_9.txt diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index fd035c64b..ddb9a7433 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -1508,6 +1508,220 @@ ), id="top_lineitems_info_2", ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key).WHERE(nation_key == 1)" + " .nation" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [18], + } + ), + "many_net_filter_1", + ), + id="many_net_filter_1", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation.WHERE(key == 2)" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [10], + } + ), + "many_net_filter_2", + ), + id="many_net_filter_2", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .customers.WHERE(nation_key == 3)" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [14], + } + ), + "many_net_filter_3", + ), + id="many_net_filter_3", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key).WHERE(nation_key == 4)" + " .nation" + " .region" + " .nations" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [88], + } + ), + "many_net_filter_4", + ), + id="many_net_filter_4", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation.WHERE(key == 5)" + " .region" + " .nations" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [81], + } + ), + "many_net_filter_5", + ), + id="many_net_filter_5", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .region" + " .nations.WHERE(key == 6)" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [77], + } + ), + "many_net_filter_6", + ), + id="many_net_filter_6", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .region" + " .nations" + " .customers.WHERE(nation_key == 7)" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [81], + } + ), + "many_net_filter_7", + ), + id="many_net_filter_7", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation.WHERE(region_key == 0)" + " .region" + " .nations" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [403], + } + ), + "many_net_filter_8", + ), + id="many_net_filter_8", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .region.WHERE(key == 1)" + " .nations" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [399], + } + ), + "many_net_filter_9", + ), + id="many_net_filter_9", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .region" + " .nations.WHERE(region_key == 2)" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [401], + } + ), + "many_net_filter_10", + ), + id="many_net_filter_10", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key).WHERE(~ISIN(nation_key, list(range(0, 25, 3))))" + " .nation.WHERE(region_key < 3)" + " .region" + " .nations.WHERE(region_key > 0)" + " .customers.WHERE(~ISIN(nation_key, list(range(1, 25, 3))))" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [269], + } + ), + "many_net_filter_11", + ), + id="many_net_filter_11", + ), pytest.param( PyDoughPandasTest( window_filter_order_1, diff --git a/tests/test_plan_refsols/many_net_filter_1.txt b/tests/test_plan_refsols/many_net_filter_1.txt new file mode 100644 index 000000000..c3287b05e --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_1.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + FILTER(condition=s_nationkey == 1:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_10.txt b/tests/test_plan_refsols/many_net_filter_10.txt new file mode 100644 index 000000000..8eb35e3e3 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_10.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=n_regionkey == 2:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_11.txt b/tests/test_plan_refsols/many_net_filter_11.txt new file mode 100644 index 000000000..6122ff62a --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_11.txt @@ -0,0 +1,13 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + FILTER(condition=NOT(ISIN(s_nationkey, [0, 3, 6, 9, 12, 15, 18, 21, 24]:array[unknown])), columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_regionkey < 3:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=n_regionkey > 0:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=NOT(ISIN(c_nationkey, [1, 4, 7, 10, 13, 16, 19, 22]:array[unknown])), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_2.txt b/tests/test_plan_refsols/many_net_filter_2.txt new file mode 100644 index 000000000..b17aa1d69 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_2.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + FILTER(condition=s_nationkey == 2:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_3.txt b/tests/test_plan_refsols/many_net_filter_3.txt new file mode 100644 index 000000000..0a679d928 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_3.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=c_nationkey == 3:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_4.txt b/tests/test_plan_refsols/many_net_filter_4.txt new file mode 100644 index 000000000..c05b4d33a --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_4.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + FILTER(condition=s_nationkey == 4:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_5.txt b/tests/test_plan_refsols/many_net_filter_5.txt new file mode 100644 index 000000000..5c3449d2a --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_5.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_nationkey == 5:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_6.txt b/tests/test_plan_refsols/many_net_filter_6.txt new file mode 100644 index 000000000..47ce0b021 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_6.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=n_nationkey == 6:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_7.txt b/tests/test_plan_refsols/many_net_filter_7.txt new file mode 100644 index 000000000..fa1027a5b --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_7.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=c_nationkey == 7:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_8.txt b/tests/test_plan_refsols/many_net_filter_8.txt new file mode 100644 index 000000000..583e93edb --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_8.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_regionkey == 0:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_9.txt b/tests/test_plan_refsols/many_net_filter_9.txt new file mode 100644 index 000000000..2f693c013 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_9.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_regionkey == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) From a538bb29c72574171c8fb18e4e41406c00b32adb Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 26 Nov 2025 10:34:06 -0800 Subject: [PATCH 143/143] [RUN CI]