diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 266587e91..aeea1bf38 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -858,7 +858,7 @@ def is_singular(self) -> bool: case _: return False # The current level is fine, so check any levels above it next. - return True if self.parent is None else self.parent.always_exists() + return True if self.parent is None else self.parent.is_singular() def equals_ignoring_successors(self, other: "HybridTree") -> bool: """ diff --git a/pydough/conversion/join_key_substitution.py b/pydough/conversion/join_key_substitution.py new file mode 100644 index 000000000..0c48dbc5d --- /dev/null +++ b/pydough/conversion/join_key_substitution.py @@ -0,0 +1,108 @@ +""" +Logic for switching references to join keys from one side of a join to the other +when certain conditions are met, thus allowing the join to be removed by the +column pruner. The conditions are: +- The join is an inner join. +- The join has equi-join keys. +- The cardinality in either direction is singular-access. +- The only columns used from one side of the join (the one being referenced in + a singular-access manner) are the join keys (or a subset thereof). +""" + +from pydough.relational import ( + ColumnReference, + ColumnReferenceFinder, + Join, + JoinCardinality, + JoinType, + RelationalExpression, + RelationalNode, + RelationalShuttle, +) +from pydough.relational.rel_util import ( + apply_substitution, + extract_equijoin_keys, +) + + +class JoinKeySubstitutionShuttle(RelationalShuttle): + """ + The relational shuttle that performs join key substitution optimization. + """ + + def visit_join(self, join: Join) -> RelationalNode: + # Build up a mapping of join key substitutions mapping input columns + # from one input to another when the optimization case is detected: + # requires an inner join with equi-join keys. + join_substitution: dict[RelationalExpression, RelationalExpression] = {} + if join.join_type == JoinType.INNER: + lhs_keys_list, rhs_keys_list = extract_equijoin_keys(join) + if len(lhs_keys_list) > 0 and len(rhs_keys_list) > 0: + # Identify which columns are used by the join columns that come + # from the left and right inputs. + lhs_keys: set[ColumnReference] = set(lhs_keys_list) + rhs_keys: set[ColumnReference] = set(rhs_keys_list) + col_finder = ColumnReferenceFinder() + for value in join.columns.values(): + value.accept(col_finder) + col_refs: set[ColumnReference] = col_finder.get_column_references() + lhs_refs = { + ref + for ref in col_refs + if ref.input_name == join.default_input_aliases[0] + } + rhs_refs = col_refs - lhs_refs + # If each row on the left side (LHS) matches exactly one row on the right side (RHS) + # (i.e., singular access) + # and the query only references columns from the RHS that are join keys, + # then we can substitute the RHS join keys with the corresponding LHS join keys. + # This allows the join to potentially be removed later since it adds no new data. + if ( + join.cardinality == JoinCardinality.SINGULAR_ACCESS + and rhs_refs <= rhs_keys + ): + for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): + join_substitution[rhs_key] = lhs_key + + # If the right side is singular access, and all the columns used + # from the left side are just the join keys, then we can + # substitute the left join keys with the right join keys. + elif ( + join.reverse_cardinality == JoinCardinality.SINGULAR_ACCESS + and lhs_refs <= lhs_keys + ): + for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): + join_substitution[lhs_key] = rhs_key + + # If any substitutions were identified, create a new Join node + # with the substitutions applied to its columns. + if len(join_substitution) > 0: + join = Join( + join.inputs, + join.condition, + join.join_type, + { + name: apply_substitution(expr, join_substitution, {}) + for name, expr in join.columns.items() + }, + join.cardinality, + join.reverse_cardinality, + join.correl_name, + ) + + # Recursively visit the inputs to the join to transform them as well. + return super().visit_join(join) + + +def join_key_substitution(root: RelationalNode) -> RelationalNode: + """ + The main entry point for join key substitution optimization. + + Args: + `root`: The root of the relational tree being optimized. + + Returns: + The optimized relational tree. + """ + shuttle: JoinKeySubstitutionShuttle = JoinKeySubstitutionShuttle() + return root.accept_shuttle(shuttle) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 9ae462eaa..35c98e75e 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -88,6 +88,7 @@ from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree from .join_aggregate_transpose import pull_aggregates_above_joins +from .join_key_substitution import join_key_substitution from .masking_shuttles import MaskLiteralComparisonShuttle from .merge_projects import merge_projects from .projection_pullup import pullup_projections @@ -1584,6 +1585,12 @@ def optimize_relational_tree( pruner: ColumnPruner = ColumnPruner() root = pruner.prune_unused_columns(root) + # Run a pass that substitutes join keys when the only columns used by one + # side of the join are the join keys. This will make some joins redundant + # and allow them to be deleted later. Then, re-run column pruning. + root = confirm_root(join_key_substitution(root)) + root = pruner.prune_unused_columns(root) + # Bubble up names from the leaf nodes to further encourage simpler naming # without aliases, and also to delete duplicate columns where possible. # This is done early to maximize the chances that a nicer name will be used @@ -1631,7 +1638,8 @@ def optimize_relational_tree( # D: join-aggregate transpose # E: projection pullup again # F: redundant aggregation removal - # G: column pruning + # G: join key substitution + # H: column pruning # This is done because pullup will create more opportunities for expression # simplification, which will allow more filters to be pushed further down, # and the combination of those together will create more opportunities for @@ -1644,6 +1652,7 @@ def optimize_relational_tree( root = confirm_root(pull_aggregates_above_joins(root)) root = confirm_root(pullup_projections(root)) root = remove_redundant_aggs(root) + root = confirm_root(join_key_substitution(root)) root = pruner.prune_unused_columns(root) # Re-run projection merging, without pushing into joins. This will allow diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index de58406be..e118c3bdd 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -1283,8 +1283,14 @@ def simplify_function_call( ) case _: - # All other cases remain non-simplified. - pass + # Simplify comparing an expression to itself as + # True/False. All other cases remain non-simplified. + if expr.inputs[0] == expr.inputs[1]: + is_eq: bool = expr.op in (pydop.EQU, pydop.LEQ, pydop.GEQ) + output_expr = LiteralExpression(is_eq, expr.data_type) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=is_eq + ) output_predicates.not_negative = True diff --git a/tests/test_metadata/sample_graphs.json b/tests/test_metadata/sample_graphs.json index 202e38d52..3b0757973 100644 --- a/tests/test_metadata/sample_graphs.json +++ b/tests/test_metadata/sample_graphs.json @@ -673,7 +673,7 @@ "parent collection": "parts", "child collection": "supply_records", "singular": false, - "always matches": false, + "always matches": true, "keys": {"key": ["part_key"]}, "description": "The records indicating which companies supply the part", "synonyms": ["producers", "vendors", "suppliers of part"] diff --git a/tests/test_metadata/snowflake_sample_graphs.json b/tests/test_metadata/snowflake_sample_graphs.json index d3b7c665e..dc510d1e7 100644 --- a/tests/test_metadata/snowflake_sample_graphs.json +++ b/tests/test_metadata/snowflake_sample_graphs.json @@ -673,7 +673,7 @@ "parent collection": "parts", "child collection": "supply_records", "singular": false, - "always matches": false, + "always matches": true, "keys": {"key": ["part_key"]}, "description": "The records indicating which companies supply the part", "synonyms": ["producers", "vendors", "suppliers of part"] diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 43dea2ff0..ddb9a7433 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -1449,6 +1449,279 @@ ), id="supplier_pct_national_qty", ), + pytest.param( + PyDoughPandasTest( + "result = (" + " regions" + " .nations" + " .customers" + " .BEST(by=account_balance.DESC(), per='regions')" + " .CALCULATE(key)" + ")", + "TPCH", + lambda: pd.DataFrame({"key": [2487, 61453, 76011, 81976, 144232]}), + "richest_customer_key_per_region", + ), + id="richest_customer_key_per_region", + ), + pytest.param( + PyDoughPandasTest( + "result = (" + " lines" + " .TOP_K(7, by=(order_key.ASC(), line_number.ASC()))" + " .CALCULATE(order_key, line_number, part_size=part_and_supplier.part.size, supplier_nation=part_and_supplier.supplier.nation.key)" + ")", + "TPCH", + lambda: pd.DataFrame( + { + "order_key": [1, 1, 1, 1, 1, 1, 2], + "line_number": [1, 2, 3, 4, 5, 6, 1], + "part_size": [9, 47, 16, 20, 44, 46, 19], + "supplier_nation": [23, 13, 5, 24, 20, 8, 0], + } + ), + "top_lineitems_info_1", + ), + id="top_lineitems_info_1", + ), + pytest.param( + PyDoughPandasTest( + "result = (" + " parts" + " .CALCULATE(part_size=size, selected_part_key=key)" + " .supply_records.CALCULATE(selected_supplier_key=supplier_key)" + " .CROSS(nations.CALCULATE(supplier_nation=key).suppliers.supply_records.lines)" + " .WHERE((part_key == selected_part_key) & (supplier_key == selected_supplier_key))" + " .TOP_K(7, by=(order_key.ASC(), line_number.ASC()))" + " .CALCULATE(order_key, line_number, part_size, supplier_nation)" + ")", + "TPCH", + lambda: pd.DataFrame( + { + "order_key": [1, 1, 1, 1, 1, 1, 2], + "line_number": [1, 2, 3, 4, 5, 6, 1], + "part_size": [9, 47, 16, 20, 44, 46, 19], + "supplier_nation": [23, 13, 5, 24, 20, 8, 0], + } + ), + "top_lineitems_info_2", + ), + id="top_lineitems_info_2", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key).WHERE(nation_key == 1)" + " .nation" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [18], + } + ), + "many_net_filter_1", + ), + id="many_net_filter_1", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation.WHERE(key == 2)" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [10], + } + ), + "many_net_filter_2", + ), + id="many_net_filter_2", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .customers.WHERE(nation_key == 3)" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [14], + } + ), + "many_net_filter_3", + ), + id="many_net_filter_3", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key).WHERE(nation_key == 4)" + " .nation" + " .region" + " .nations" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [88], + } + ), + "many_net_filter_4", + ), + id="many_net_filter_4", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation.WHERE(key == 5)" + " .region" + " .nations" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [81], + } + ), + "many_net_filter_5", + ), + id="many_net_filter_5", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .region" + " .nations.WHERE(key == 6)" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [77], + } + ), + "many_net_filter_6", + ), + id="many_net_filter_6", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .region" + " .nations" + " .customers.WHERE(nation_key == 7)" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [81], + } + ), + "many_net_filter_7", + ), + id="many_net_filter_7", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation.WHERE(region_key == 0)" + " .region" + " .nations" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [403], + } + ), + "many_net_filter_8", + ), + id="many_net_filter_8", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .region.WHERE(key == 1)" + " .nations" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [399], + } + ), + "many_net_filter_9", + ), + id="many_net_filter_9", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key)" + " .nation" + " .region" + " .nations.WHERE(region_key == 2)" + " .customers" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [401], + } + ), + "many_net_filter_10", + ), + id="many_net_filter_10", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(" + " suppliers.CALCULATE(sk = key).WHERE(~ISIN(nation_key, list(range(0, 25, 3))))" + " .nation.WHERE(region_key < 3)" + " .region" + " .nations.WHERE(region_key > 0)" + " .customers.WHERE(~ISIN(nation_key, list(range(1, 25, 3))))" + " .WHERE(key == sk)" + "))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [269], + } + ), + "many_net_filter_11", + ), + id="many_net_filter_11", + ), pytest.param( PyDoughPandasTest( window_filter_order_1, diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index c61c67b94..8d4b6d5bc 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index 0b267e0e1..c48246b61 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,7 +1,5 @@ -ROOT(columns=[('order_key', o_orderkey), ('max_ratio', max_ratio)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) +ROOT(columns=[('order_key', l_orderkey), ('max_ratio', max_ratio)], orderings=[]) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_on_function_call.txt b/tests/test_plan_refsols/aggregate_on_function_call.txt index 54eca15c5..5313266ab 100644 --- a/tests/test_plan_refsols/aggregate_on_function_call.txt +++ b/tests/test_plan_refsols/aggregate_on_function_call.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('nation_name', n_nationkey), ('avg_consumer_value', max_expr)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_expr': t1.max_expr, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_expr': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('avg_consumer_value', avg_consumer_value)], orderings=[]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 18f353c33..6c99fbc08 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 7c8d13cc8..d041361c5 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,7 +1,5 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index b719a64e1..2ef53d9c2 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index f29c91858..339aaf5d5 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index c61c67b94..8d4b6d5bc 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index c12bdd20e..a273602c6 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', 0:numeric), ('sum_price_of_10parts', None:unknown)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 022d52ee5..2312171f5 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -7,7 +7,7 @@ ROOT(columns=[('supplier_name', anything_s_name), ('part_name', anything_p_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ap.txt b/tests/test_plan_refsols/common_prefix_ap.txt index 18f665258..43b28f45b 100644 --- a/tests/test_plan_refsols/common_prefix_ap.txt +++ b/tests/test_plan_refsols/common_prefix_ap.txt @@ -1,10 +1,10 @@ ROOT(columns=[('part_name', p_name), ('supplier_name', s_name), ('supplier_quantity', ps_availqty), ('supplier_nation', n_name)], orderings=[(p_name):asc_first]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=p_brand == 'Brand#32':string & p_size == 10:numeric & CONTAINS(p_name, 'pink':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey, 'p_size': p_size}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 32e6086dd..26ca70545 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -7,7 +7,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier' JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 01e756d5e..7694c7141 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -12,7 +12,7 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppl SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 7d1b191be..ba4252b2b 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -10,7 +10,7 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 9fe75904a..125f405ef 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,12 +1,12 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.avg_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 2f8b7cb03..4b184d5bf 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,15 +1,15 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'supplier_avg_price': t0.supplier_avg_price}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'supplier_avg_price': AVG(p_retailprice)}) + JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index c838204a3..ede8c4337 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,7 +1,5 @@ ROOT(columns=[('supplier_name', anything_s_name), ('n_super_cust', n_rows)], orderings=[(n_rows):desc_last], limit=5:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 72f58693b..00a187e5f 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,6 +1,6 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_name', anything_anything_anything_n_name), ('n_above_avg_customers', anything_anything_n_rows), ('n_above_avg_suppliers', agg_3_14), ('min_cust_acctbal', anything_min_c_acctbal), ('max_cust_acctbal', anything_max_c_acctbal)], orderings=[(anything_anything_anything_n_regionkey):asc_first, (anything_anything_anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_nationkey': t1.n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'agg_3_14': COUNT(), 'anything_anything_anything_n_name': ANYTHING(anything_anything_n_name), 'anything_anything_anything_n_regionkey': ANYTHING(anything_anything_n_regionkey), 'anything_anything_n_rows': ANYTHING(anything_n_rows), 'anything_max_c_acctbal': ANYTHING(max_c_acctbal), 'anything_min_c_acctbal': ANYTHING(min_c_acctbal)}) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'anything_anything_n_regionkey': t0.anything_anything_n_regionkey, 'anything_n_rows': t0.anything_n_rows, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 's_nationkey': t1.s_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'anything_n_rows': ANYTHING(n_rows), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'c_acctbal': t1.c_acctbal, 'c_nationkey': t1.c_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) @@ -12,9 +12,7 @@ ROOT(columns=[('region_key', anything_anything_anything_n_regionkey), ('nation_n SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_nationkey': t0.s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index bbdb92976..0813f6ed2 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -1,18 +1,15 @@ -ROOT(columns=[('year', year_o_orderdate), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_sum_n_rows), ('total_value', DEFAULT_TO(sum_sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_sum_sum_n_rows': SUM(sum_sum_n_rows), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year_o_orderdate': t0.year_o_orderdate}) - AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year_o_orderdate': t0.year_o_orderdate}) - AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_rows': t0.n_rows, 'ps_suppkey': t1.ps_suppkey, 'sum_l_extendedprice': t0.sum_l_extendedprice, 'year_o_orderdate': t0.year_o_orderdate}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT(), 'sum_l_extendedprice': SUM(l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) +ROOT(columns=[('year', year_o_orderdate), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_n_rows), ('total_value', DEFAULT_TO(sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) + AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year_o_orderdate': t0.year_o_orderdate}) + AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year_o_orderdate': year_o_orderdate}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_rows': t0.n_rows, 's_nationkey': t1.s_nationkey, 'sum_l_extendedprice': t0.sum_l_extendedprice, 'year_o_orderdate': t0.year_o_orderdate}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT(), 'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt index 7f2716e17..41716ca66 100644 --- a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt +++ b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('nation_name', n_nationkey), ('num_customers', n_rows), ('num_suppliers', agg_3), ('customer_to_supplier_wealth_ratio', DEFAULT_TO(sum_c_acctbal, 0:numeric) / DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('num_customers', n_rows), ('num_suppliers', agg_3), ('customer_to_supplier_wealth_ratio', DEFAULT_TO(sum_c_acctbal, 0:numeric) / DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.n_rows, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/count_single_subcollection.txt b/tests/test_plan_refsols/count_single_subcollection.txt index 9f3091e89..2618ad06c 100644 --- a/tests/test_plan_refsols/count_single_subcollection.txt +++ b/tests/test_plan_refsols/count_single_subcollection.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('nation_name', n_nationkey), ('num_customers', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('num_customers', n_rows)], orderings=[]) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/cryptbank_agg_04_raw.txt b/tests/test_plan_refsols/cryptbank_agg_04_raw.txt index 6e2aa056c..673c8b1d2 100644 --- a/tests/test_plan_refsols/cryptbank_agg_04_raw.txt +++ b/tests/test_plan_refsols/cryptbank_agg_04_raw.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) - JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_unmask_a_balance': t1.sum_unmask_a_balance}) - SCAN(table=CRBNK.BRANCHES, columns={'b_key': b_key}) - AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) +ROOT(columns=[('branch_key', a_branchkey), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) + AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) diff --git a/tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt b/tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt index 6e2aa056c..673c8b1d2 100644 --- a/tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) - JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_unmask_a_balance': t1.sum_unmask_a_balance}) - SCAN(table=CRBNK.BRANCHES, columns={'b_key': b_key}) - AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) +ROOT(columns=[('branch_key', a_branchkey), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) + AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index a0c1f8f96..a9a88b56e 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,26 +1,22 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', key_5), ('c_bal', c_acctbal), ('cr_bal', account_balance_13), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', c_custkey)], orderings=[(n_name):asc_first], limit=10:numeric) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=n_nationkey == s_nationkey & RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=n_nationkey == s_nationkey & RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt index 077f041bc..f465f4290 100644 --- a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt +++ b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('n_events', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, columns={}) +ROOT(columns=[('n_events', ndistinct_ev_key)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_ev_key': NDISTINCT(ev_key)}) + JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, columns={'ev_key': t0.ev_key}) JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=t_name == 'Pre-Dawn':string, columns={'t_end_hour': t_end_hour, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt index 09e213e3f..d05efdf4f 100644 --- a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt +++ b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt @@ -1,9 +1,7 @@ -ROOT(columns=[('engine', search_engine), ('n_users', DEFAULT_TO(ndistinct_user_id, 0:numeric))], orderings=[(search_engine):asc_first]) - JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) +ROOT(columns=[('engine', search_engine), ('n_users', DEFAULT_TO(ndistinct_search_user_id, 0:numeric))], orderings=[(search_engine):asc_first]) + JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ndistinct_search_user_id': t1.ndistinct_search_user_id, 'search_engine': t0.search_engine}) AGGREGATE(keys={'search_engine': search_engine}, aggregations={}) SCAN(table=SEARCHES, columns={'search_engine': search_engine}) - AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_user_id': NDISTINCT(user_id)}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) - FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) - SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id}) + AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_search_user_id': NDISTINCT(search_user_id)}) + FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) + SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) diff --git a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt index 1987fec13..1d9130f74 100644 --- a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt +++ b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt @@ -8,7 +8,7 @@ ROOT(columns=[('order_key', l_orderkey), ('ship_date', l_shipdate), ('extended_p SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=STARTSWITH(p_type, 'ECONOMY':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/many_net_filter_1.txt b/tests/test_plan_refsols/many_net_filter_1.txt new file mode 100644 index 000000000..c3287b05e --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_1.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + FILTER(condition=s_nationkey == 1:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_10.txt b/tests/test_plan_refsols/many_net_filter_10.txt new file mode 100644 index 000000000..8eb35e3e3 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_10.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=n_regionkey == 2:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_11.txt b/tests/test_plan_refsols/many_net_filter_11.txt new file mode 100644 index 000000000..6122ff62a --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_11.txt @@ -0,0 +1,13 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + FILTER(condition=NOT(ISIN(s_nationkey, [0, 3, 6, 9, 12, 15, 18, 21, 24]:array[unknown])), columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_regionkey < 3:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=n_regionkey > 0:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=NOT(ISIN(c_nationkey, [1, 4, 7, 10, 13, 16, 19, 22]:array[unknown])), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_2.txt b/tests/test_plan_refsols/many_net_filter_2.txt new file mode 100644 index 000000000..b17aa1d69 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_2.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + FILTER(condition=s_nationkey == 2:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_3.txt b/tests/test_plan_refsols/many_net_filter_3.txt new file mode 100644 index 000000000..0a679d928 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_3.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=c_nationkey == 3:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_4.txt b/tests/test_plan_refsols/many_net_filter_4.txt new file mode 100644 index 000000000..c05b4d33a --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_4.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + FILTER(condition=s_nationkey == 4:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_5.txt b/tests/test_plan_refsols/many_net_filter_5.txt new file mode 100644 index 000000000..5c3449d2a --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_5.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_nationkey == 5:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_6.txt b/tests/test_plan_refsols/many_net_filter_6.txt new file mode 100644 index 000000000..47ce0b021 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_6.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=n_nationkey == 6:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_7.txt b/tests/test_plan_refsols/many_net_filter_7.txt new file mode 100644 index 000000000..fa1027a5b --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_7.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=c_nationkey == 7:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_8.txt b/tests/test_plan_refsols/many_net_filter_8.txt new file mode 100644 index 000000000..583e93edb --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_8.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_regionkey == 0:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_9.txt b/tests/test_plan_refsols/many_net_filter_9.txt new file mode 100644 index 000000000..2f693c013 --- /dev/null +++ b/tests/test_plan_refsols/many_net_filter_9.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_regionkey == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 132528e7a..cec6ecabc 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,30 +1,19 @@ -ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', sum_sum_sbTxShares_1 / sum_count_sbTxShares_1), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares_1 / t0.sum_count_sbTxShares_1 & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t1.sum_sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) +ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', sum_sbTxShares / count_sbTxShares)], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sbTxShares / t0.count_sbTxShares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_sbTxShares': t1.sum_sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'avg_sbTxShares': AVG(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'avg_sbTxShares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multiple_has_hasnot.txt b/tests/test_plan_refsols/multiple_has_hasnot.txt index 78b9d31fd..cf25533fb 100644 --- a/tests/test_plan_refsols/multiple_has_hasnot.txt +++ b/tests/test_plan_refsols/multiple_has_hasnot.txt @@ -3,20 +3,20 @@ ROOT(columns=[('name', p_name)], orderings=[]) JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'ARGENTINA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index ad87610a6..684efa27a 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'c_nationkey': t0.c_nationkey, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt index 94e0925e0..d3daca766 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('nation_name', c_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/orders_sum_line_price.txt b/tests/test_plan_refsols/orders_sum_line_price.txt index 0ffd28d94..1720fa8af 100644 --- a/tests/test_plan_refsols/orders_sum_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_line_price.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('okey', o_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) +ROOT(columns=[('okey', l_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt index 046adabcd..2ef503a8b 100644 --- a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('okey', o_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) +ROOT(columns=[('okey', l_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index 0cbd4c887..0e4ebd6a9 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True))], orderings=[(p_partkey):asc_first], limit=15:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) diff --git a/tests/test_plan_refsols/richest_customer_key_per_region.txt b/tests/test_plan_refsols/richest_customer_key_per_region.txt new file mode 100644 index 000000000..67636a941 --- /dev/null +++ b/tests/test_plan_refsols/richest_customer_key_per_region.txt @@ -0,0 +1,5 @@ +ROOT(columns=[('key', c_custkey)], orderings=[]) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 18f353c33..6c99fbc08 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_anti_2.txt b/tests/test_plan_refsols/simple_anti_2.txt index b87256acc..081922387 100644 --- a/tests/test_plan_refsols/simple_anti_2.txt +++ b/tests/test_plan_refsols/simple_anti_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size < 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_semi_2.txt b/tests/test_plan_refsols/simple_semi_2.txt index d52362ab1..e3f041ae8 100644 --- a/tests/test_plan_refsols/simple_semi_2.txt +++ b/tests/test_plan_refsols/simple_semi_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size < 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 8251cf845..857976233 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -6,7 +6,7 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_ord FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'l_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index a63790075..4b815e991 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,14 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('n_shipments', n_rows)], orderings=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (s_name):asc_first], limit=3:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + FILTER(condition=RANKING(args=[], partition=[l_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'l_suppkey': l_suppkey, 'n_rows': n_rows, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/top_lineitems_info_1.txt b/tests/test_plan_refsols/top_lineitems_info_1.txt new file mode 100644 index 000000000..8ea7fb213 --- /dev/null +++ b/tests/test_plan_refsols/top_lineitems_info_1.txt @@ -0,0 +1,11 @@ +ROOT(columns=[('order_key', l_orderkey), ('line_number', l_linenumber), ('part_size', p_size), ('supplier_nation', s_nationkey)], orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first]) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_size': t0.p_size, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_size': t1.p_size}) + LIMIT(limit=7:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first]) + SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_size': t1.p_size, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_lineitems_info_2.txt b/tests/test_plan_refsols/top_lineitems_info_2.txt new file mode 100644 index 000000000..aff71c57c --- /dev/null +++ b/tests/test_plan_refsols/top_lineitems_info_2.txt @@ -0,0 +1,12 @@ +ROOT(columns=[('order_key', l_orderkey), ('line_number', l_linenumber), ('part_size', p_size), ('supplier_nation', n_nationkey)], orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first], limit=7:numeric) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.supplier_key_11 == t1.l_suppkey & t1.l_partkey == t0.p_partkey & t1.l_suppkey == t0.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'n_nationkey': t0.n_nationkey, 'p_size': t0.p_size}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'supplier_key_11': t1.ps_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_suppkey': t0.ps_suppkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_suppkey': t1.ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index e097c9898..5f3a777cd 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first], limit=10:numeric) FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 0d8a6953e..e1b29a494 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -5,7 +5,7 @@ ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_ SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.l_orderkey, type=ANTI, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) @@ -14,10 +14,8 @@ ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_ SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) - FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.l_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/wdi_low_income_country_with_series.txt b/tests/test_plan_refsols/wdi_low_income_country_with_series.txt index e97818b10..354a7fe14 100644 --- a/tests/test_plan_refsols/wdi_low_income_country_with_series.txt +++ b/tests/test_plan_refsols/wdi_low_income_country_with_series.txt @@ -2,7 +2,5 @@ ROOT(columns=[('country_code', CountryCode)], orderings=[]) JOIN(condition=t0.CountryCode == t1.Countrycode, type=SEMI, columns={'CountryCode': t0.CountryCode}) FILTER(condition=IncomeGroup == 'Low income':string, columns={'CountryCode': CountryCode}) SCAN(table=main.Country, columns={'CountryCode': CountryCode, 'IncomeGroup': IncomeGroup}) - JOIN(condition=t0.Seriescode == t1.SeriesCode, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'Countrycode': t0.Countrycode}) + FILTER(condition=Seriescode == 'DT.DOD.DECT.CD':string, columns={'Countrycode': Countrycode}) SCAN(table=main.CountryNotes, columns={'Countrycode': Countrycode, 'Seriescode': Seriescode}) - FILTER(condition=SeriesCode == 'DT.DOD.DECT.CD':string, columns={'SeriesCode': SeriesCode}) - SCAN(table=main.Series, columns={'SeriesCode': SeriesCode}) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index 011898756..cc4c44913 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,7 +1,8 @@ WITH _s4 AS ( SELECT partsupp.ps_suppkey, - AVG(part.p_retailprice) AS avg_p_retailprice + SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr, + SUM(part.p_retailprice) AS sum_p_retailprice FROM tpch.supplier AS supplier JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey @@ -18,9 +19,11 @@ FROM _s4 AS _s4 JOIN tpch.partsupp AS partsupp ON _s4.ps_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part - ON _s4.avg_p_retailprice > part.p_retailprice - AND part.p_container = 'LG DRUM' + ON part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey + AND part.p_retailprice < ( + CAST(_s4.sum_p_retailprice AS REAL) / _s4.sum_expr + ) AND part.p_retailprice < ( partsupp.ps_supplycost * 1.5 ) diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index f65eaa980..e93da2ce7 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -6,7 +6,8 @@ WITH _s0 AS ( SELECT partsupp.ps_suppkey, MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, - AVG(part.p_retailprice) AS supplier_avg_price + SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr, + SUM(part.p_retailprice) AS sum_p_retailprice FROM _s0 AS _s0 JOIN tpch.supplier AS supplier ON supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 @@ -23,9 +24,11 @@ FROM _s6 AS _s6 JOIN tpch.partsupp AS partsupp ON _s6.ps_suppkey = partsupp.ps_suppkey JOIN tpch.part AS part - ON _s6.supplier_avg_price > part.p_retailprice - AND part.p_container = 'LG DRUM' + ON part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey + AND part.p_retailprice < ( + CAST(_s6.sum_p_retailprice AS REAL) / _s6.sum_expr + ) AND part.p_retailprice < ( _s6.anything_avg_p_retailprice * 0.85 ) diff --git a/tests/test_sql_refsols/correl_19_sqlite.sql b/tests/test_sql_refsols/correl_19_sqlite.sql index 8f1892961..795361957 100644 --- a/tests/test_sql_refsols/correl_19_sqlite.sql +++ b/tests/test_sql_refsols/correl_19_sqlite.sql @@ -2,11 +2,9 @@ SELECT MAX(supplier.s_name) AS supplier_name, COUNT(*) AS n_super_cust FROM tpch.supplier AS supplier -JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey JOIN tpch.customer AS customer ON customer.c_acctbal > supplier.s_acctbal - AND customer.c_nationkey = nation.n_nationkey + AND customer.c_nationkey = supplier.s_nationkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/correl_29_sqlite.sql b/tests/test_sql_refsols/correl_29_sqlite.sql index 5ec76c6d8..964e96797 100644 --- a/tests/test_sql_refsols/correl_29_sqlite.sql +++ b/tests/test_sql_refsols/correl_29_sqlite.sql @@ -23,7 +23,7 @@ WITH _t5 AS ( ON _s1.avg_c_acctbal < _s3.c_acctbal AND _s3.c_nationkey = nation.n_nationkey GROUP BY 1 -), _s10 AS ( +), _s8 AS ( SELECT _s5.c_nationkey, MAX(_t3.anything_n_name) AS anything_anything_n_name, @@ -43,7 +43,7 @@ WITH _t5 AS ( s_acctbal, s_nationkey FROM tpch.supplier -), _s7 AS ( +), _s6 AS ( SELECT s_nationkey, AVG(s_acctbal) AS avg_s_acctbal @@ -52,21 +52,19 @@ WITH _t5 AS ( 1 ) SELECT - MAX(_s10.anything_anything_n_regionkey) AS region_key, - MAX(_s10.anything_anything_n_name) AS nation_name, - MAX(_s10.anything_n_rows) AS n_above_avg_customers, + MAX(_s8.anything_anything_n_regionkey) AS region_key, + MAX(_s8.anything_anything_n_name) AS nation_name, + MAX(_s8.anything_n_rows) AS n_above_avg_customers, COUNT(*) AS n_above_avg_suppliers, - MAX(_s10.min_c_acctbal) AS min_cust_acctbal, - MAX(_s10.max_c_acctbal) AS max_cust_acctbal -FROM _s10 AS _s10 -JOIN tpch.nation AS nation - ON _s10.c_nationkey = nation.n_nationkey -JOIN _s7 AS _s7 - ON _s7.s_nationkey = nation.n_nationkey -JOIN _t6 AS _s9 - ON _s7.avg_s_acctbal < _s9.s_acctbal AND _s9.s_nationkey = nation.n_nationkey + MAX(_s8.min_c_acctbal) AS min_cust_acctbal, + MAX(_s8.max_c_acctbal) AS max_cust_acctbal +FROM _s8 AS _s8 +JOIN _s6 AS _s6 + ON _s6.s_nationkey = _s8.c_nationkey +JOIN _t6 AS _s7 + ON _s6.avg_s_acctbal < _s7.s_acctbal AND _s6.s_nationkey = _s7.s_nationkey GROUP BY - nation.n_nationkey + _s6.s_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/cryptbank_agg_04_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_04_raw_sqlite.sql index 992e6eccd..92217107e 100644 --- a/tests/test_sql_refsols/cryptbank_agg_04_raw_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_agg_04_raw_sqlite.sql @@ -1,4 +1,4 @@ -WITH _s1 AS ( +WITH _t0 AS ( SELECT a_branchkey, SUM(SQRT(a_balance)) AS sum_unmask_a_balance @@ -7,11 +7,9 @@ WITH _s1 AS ( 1 ) SELECT - branches.b_key AS branch_key, + a_branchkey AS branch_key, ROUND( - CAST(COALESCE(_s1.sum_unmask_a_balance, 0) AS REAL) / SUM(COALESCE(_s1.sum_unmask_a_balance, 0)) OVER (), + CAST(COALESCE(sum_unmask_a_balance, 0) AS REAL) / SUM(COALESCE(sum_unmask_a_balance, 0)) OVER (), 2 ) AS pct_total_wealth -FROM crbnk.branches AS branches -JOIN _s1 AS _s1 - ON _s1.a_branchkey = branches.b_key +FROM _t0 diff --git a/tests/test_sql_refsols/cryptbank_agg_04_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_04_rewrite_sqlite.sql index 992e6eccd..92217107e 100644 --- a/tests/test_sql_refsols/cryptbank_agg_04_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_agg_04_rewrite_sqlite.sql @@ -1,4 +1,4 @@ -WITH _s1 AS ( +WITH _t0 AS ( SELECT a_branchkey, SUM(SQRT(a_balance)) AS sum_unmask_a_balance @@ -7,11 +7,9 @@ WITH _s1 AS ( 1 ) SELECT - branches.b_key AS branch_key, + a_branchkey AS branch_key, ROUND( - CAST(COALESCE(_s1.sum_unmask_a_balance, 0) AS REAL) / SUM(COALESCE(_s1.sum_unmask_a_balance, 0)) OVER (), + CAST(COALESCE(sum_unmask_a_balance, 0) AS REAL) / SUM(COALESCE(sum_unmask_a_balance, 0)) OVER (), 2 ) AS pct_total_wealth -FROM crbnk.branches AS branches -JOIN _s1 AS _s1 - ON _s1.a_branchkey = branches.b_key +FROM _t0 diff --git a/tests/test_sql_refsols/defog_academic_gen1_ansi.sql b/tests/test_sql_refsols/defog_academic_gen1_ansi.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_ansi.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen1_mysql.sql b/tests/test_sql_refsols/defog_academic_gen1_mysql.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_mysql.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen1_postgres.sql b/tests/test_sql_refsols/defog_academic_gen1_postgres.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_postgres.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen1_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen1_snowflake.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_snowflake.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen1_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen1_sqlite.sql index 75c2544bc..dcbb778aa 100644 --- a/tests/test_sql_refsols/defog_academic_gen1_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen1_sqlite.sql @@ -3,10 +3,8 @@ WITH _t0 AS ( writes.aid, COUNT(DISTINCT domain_publication.did) AS ndistinct_did FROM main.writes AS writes - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name IN ('Data Science', 'Machine Learning') diff --git a/tests/test_sql_refsols/defog_academic_gen21_ansi.sql b/tests/test_sql_refsols/defog_academic_gen21_ansi.sql index 45760b3bf..e28abbbc1 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_ansi.sql @@ -6,9 +6,7 @@ JOIN main.author AS author ON author.oid = organization.oid JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' diff --git a/tests/test_sql_refsols/defog_academic_gen21_mysql.sql b/tests/test_sql_refsols/defog_academic_gen21_mysql.sql index 4a0901199..e0c5c9c96 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_mysql.sql @@ -4,10 +4,8 @@ WITH _u_0 AS ( FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' GROUP BY diff --git a/tests/test_sql_refsols/defog_academic_gen21_postgres.sql b/tests/test_sql_refsols/defog_academic_gen21_postgres.sql index 4a0901199..e0c5c9c96 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_postgres.sql @@ -4,10 +4,8 @@ WITH _u_0 AS ( FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' GROUP BY diff --git a/tests/test_sql_refsols/defog_academic_gen21_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen21_snowflake.sql index 4a0901199..e0c5c9c96 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_snowflake.sql @@ -4,10 +4,8 @@ WITH _u_0 AS ( FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' GROUP BY diff --git a/tests/test_sql_refsols/defog_academic_gen21_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen21_sqlite.sql index 4a0901199..e0c5c9c96 100644 --- a/tests/test_sql_refsols/defog_academic_gen21_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen21_sqlite.sql @@ -4,10 +4,8 @@ WITH _u_0 AS ( FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid - JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' GROUP BY diff --git a/tests/test_sql_refsols/defog_academic_gen22_ansi.sql b/tests/test_sql_refsols/defog_academic_gen22_ansi.sql index 95538288a..ca70ec1eb 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_ansi.sql @@ -10,9 +10,7 @@ SELECT FROM main.author AS author JOIN _s0 AS _s0 ON _s0.aid = author.aid -JOIN main.domain AS domain - ON _s0.did = domain.did -JOIN _s0 AS _s3 - ON _s3.did = domain.did +JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author_2 - ON LOWER(author_2.name) LIKE '%martin%' AND _s3.aid = author_2.aid + ON LOWER(author_2.name) LIKE '%martin%' AND _s1.aid = author_2.aid diff --git a/tests/test_sql_refsols/defog_academic_gen22_mysql.sql b/tests/test_sql_refsols/defog_academic_gen22_mysql.sql index c60c3d92b..53ec2291e 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_mysql.sql @@ -7,12 +7,10 @@ WITH _s0 AS ( SELECT _s0.aid AS _u_1 FROM _s0 AS _s0 - JOIN main.domain AS domain - ON _s0.did = domain.did - JOIN _s0 AS _s3 - ON _s3.did = domain.did + JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author - ON LOWER(author.name) LIKE '%martin%' AND _s3.aid = author.aid + ON LOWER(author.name) LIKE '%martin%' AND _s1.aid = author.aid GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_academic_gen22_postgres.sql b/tests/test_sql_refsols/defog_academic_gen22_postgres.sql index c60c3d92b..53ec2291e 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_postgres.sql @@ -7,12 +7,10 @@ WITH _s0 AS ( SELECT _s0.aid AS _u_1 FROM _s0 AS _s0 - JOIN main.domain AS domain - ON _s0.did = domain.did - JOIN _s0 AS _s3 - ON _s3.did = domain.did + JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author - ON LOWER(author.name) LIKE '%martin%' AND _s3.aid = author.aid + ON LOWER(author.name) LIKE '%martin%' AND _s1.aid = author.aid GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_academic_gen22_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen22_snowflake.sql index 8cefdff63..071aad418 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_snowflake.sql @@ -7,12 +7,10 @@ WITH _s0 AS ( SELECT _s0.aid AS _u_1 FROM _s0 AS _s0 - JOIN main.domain AS domain - ON _s0.did = domain.did - JOIN _s0 AS _s3 - ON _s3.did = domain.did + JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author - ON CONTAINS(LOWER(author.name), 'martin') AND _s3.aid = author.aid + ON CONTAINS(LOWER(author.name), 'martin') AND _s1.aid = author.aid GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_academic_gen22_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen22_sqlite.sql index c60c3d92b..53ec2291e 100644 --- a/tests/test_sql_refsols/defog_academic_gen22_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen22_sqlite.sql @@ -7,12 +7,10 @@ WITH _s0 AS ( SELECT _s0.aid AS _u_1 FROM _s0 AS _s0 - JOIN main.domain AS domain - ON _s0.did = domain.did - JOIN _s0 AS _s3 - ON _s3.did = domain.did + JOIN _s0 AS _s1 + ON _s0.did = _s1.did JOIN main.author AS author - ON LOWER(author.name) LIKE '%martin%' AND _s3.aid = author.aid + ON LOWER(author.name) LIKE '%martin%' AND _s1.aid = author.aid GROUP BY 1 ) diff --git a/tests/test_sql_refsols/defog_academic_gen24_ansi.sql b/tests/test_sql_refsols/defog_academic_gen24_ansi.sql index bf25d3345..709d36693 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_ansi.sql @@ -1,12 +1,10 @@ -WITH _s9 AS ( +WITH _s7 AS ( SELECT domain_conference.cid, writes.pid FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference @@ -15,7 +13,7 @@ WITH _s9 AS ( SELECT publication.title FROM main.publication AS publication -JOIN _s9 AS _s9 - ON _s9.cid = publication.cid AND _s9.pid = publication.pid +JOIN _s7 AS _s7 + ON _s7.cid = publication.cid AND _s7.pid = publication.pid WHERE publication.year = 2020 diff --git a/tests/test_sql_refsols/defog_academic_gen24_mysql.sql b/tests/test_sql_refsols/defog_academic_gen24_mysql.sql index ebc20c922..8b9e27d18 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_mysql.sql @@ -3,10 +3,8 @@ WITH _u_0 AS ( domain_conference.cid AS _u_1, writes.pid AS _u_2 FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference diff --git a/tests/test_sql_refsols/defog_academic_gen24_postgres.sql b/tests/test_sql_refsols/defog_academic_gen24_postgres.sql index ebc20c922..8b9e27d18 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_postgres.sql @@ -3,10 +3,8 @@ WITH _u_0 AS ( domain_conference.cid AS _u_1, writes.pid AS _u_2 FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference diff --git a/tests/test_sql_refsols/defog_academic_gen24_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen24_snowflake.sql index e9c898c8f..1a191d3e8 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_snowflake.sql @@ -3,10 +3,8 @@ WITH _u_0 AS ( domain_conference.cid AS _u_1, writes.pid AS _u_2 FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON CONTAINS(LOWER(domain.name), 'sociology') AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference diff --git a/tests/test_sql_refsols/defog_academic_gen24_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen24_sqlite.sql index ebc20c922..8b9e27d18 100644 --- a/tests/test_sql_refsols/defog_academic_gen24_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen24_sqlite.sql @@ -3,10 +3,8 @@ WITH _u_0 AS ( domain_conference.cid AS _u_1, writes.pid AS _u_2 FROM main.writes AS writes - JOIN main.author AS author - ON author.aid = writes.aid JOIN main.domain_author AS domain_author - ON author.aid = domain_author.aid + ON domain_author.aid = writes.aid JOIN main.domain AS domain ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did JOIN main.domain_conference AS domain_conference diff --git a/tests/test_sql_refsols/defog_academic_gen25_ansi.sql b/tests/test_sql_refsols/defog_academic_gen25_ansi.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_ansi.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_ansi.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen25_mysql.sql b/tests/test_sql_refsols/defog_academic_gen25_mysql.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_mysql.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_mysql.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen25_postgres.sql b/tests/test_sql_refsols/defog_academic_gen25_postgres.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_postgres.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_postgres.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen25_snowflake.sql b/tests/test_sql_refsols/defog_academic_gen25_snowflake.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_snowflake.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_snowflake.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen25_sqlite.sql b/tests/test_sql_refsols/defog_academic_gen25_sqlite.sql index 96d7e1db7..41c74bcd0 100644 --- a/tests/test_sql_refsols/defog_academic_gen25_sqlite.sql +++ b/tests/test_sql_refsols/defog_academic_gen25_sqlite.sql @@ -3,9 +3,7 @@ SELECT DISTINCT FROM main.author AS author JOIN main.writes AS writes ON author.aid = writes.aid -JOIN main.publication AS publication - ON publication.pid = writes.pid JOIN main.domain_publication AS domain_publication - ON domain_publication.pid = publication.pid + ON domain_publication.pid = writes.pid JOIN main.domain AS domain ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv6_ansi.sql index f7d720393..25888775b 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_ansi.sql @@ -7,8 +7,6 @@ WITH _t0 AS ( ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY updated_at DESC NULLS FIRST) = 1 ) SELECT - users.uid AS user_id, - _t0.balance AS latest_balance -FROM main.users AS users -JOIN _t0 AS _t0 - ON _t0.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t0 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_mysql.sql b/tests/test_sql_refsols/defog_ewallet_adv6_mysql.sql index 617ba93ba..3b104b008 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_mysql.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_mysql.sql @@ -6,8 +6,8 @@ WITH _t AS ( FROM main.wallet_user_balance_daily ) SELECT - users.uid AS user_id, - _t.balance AS latest_balance -FROM main.users AS users -JOIN _t AS _t - ON _t._w = 1 AND _t.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_postgres.sql b/tests/test_sql_refsols/defog_ewallet_adv6_postgres.sql index 056bbf172..1dec440e4 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_postgres.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_postgres.sql @@ -6,8 +6,8 @@ WITH _t AS ( FROM main.wallet_user_balance_daily ) SELECT - users.uid AS user_id, - _t.balance AS latest_balance -FROM main.users AS users -JOIN _t AS _t - ON _t._w = 1 AND _t.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql index facbf73f3..8c42869d6 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql @@ -7,8 +7,6 @@ WITH _t0 AS ( ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY updated_at DESC) = 1 ) SELECT - users.uid AS user_id, - _t0.balance AS latest_balance -FROM main.users AS users -JOIN _t0 AS _t0 - ON _t0.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t0 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv6_sqlite.sql index 056bbf172..1dec440e4 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv6_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv6_sqlite.sql @@ -6,8 +6,8 @@ WITH _t AS ( FROM main.wallet_user_balance_daily ) SELECT - users.uid AS user_id, - _t.balance AS latest_balance -FROM main.users AS users -JOIN _t AS _t - ON _t._w = 1 AND _t.user_id = users.uid + user_id, + balance AS latest_balance +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_ansi.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_ansi.sql index a37de8ff0..c23e12993 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_ansi.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_ansi.sql @@ -5,7 +5,7 @@ WITH _s0 AS ( FROM events ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN times AS times ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s0.ev_dt AS DATETIME)) diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_mysql.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_mysql.sql index efc1d7023..80a0ac2b4 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_mysql.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_mysql.sql @@ -15,7 +15,7 @@ WITH _s0 AS ( 1 ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN TIMES AS TIMES ON TIMES.t_end_hour > HOUR(_s0.ev_dt) diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_postgres.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_postgres.sql index c1deb50ea..0c4383f57 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_postgres.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_postgres.sql @@ -15,7 +15,7 @@ WITH _s0 AS ( 1 ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN times AS times ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s0.ev_dt AS TIMESTAMP)) diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql index 5e8914950..5aae6c921 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql @@ -15,7 +15,7 @@ WITH _s0 AS ( 1 ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN times AS times ON times.t_end_hour > HOUR(CAST(_s0.ev_dt AS TIMESTAMP)) diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_sqlite.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_sqlite.sql index db07934cd..4562f3a60 100644 --- a/tests/test_sql_refsols/epoch_num_predawn_cold_war_sqlite.sql +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_sqlite.sql @@ -15,7 +15,7 @@ WITH _s0 AS ( 1 ) SELECT - COUNT(*) AS n_events + COUNT(DISTINCT _s0.ev_key) AS n_events FROM _s0 AS _s0 JOIN times AS times ON times.t_end_hour > CAST(STRFTIME('%H', _s0.ev_dt) AS INTEGER) diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_ansi.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_ansi.sql index 1149447fd..91fa105d2 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_ansi.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_ansi.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM searches -), _s3 AS ( +), _s1 AS ( SELECT - searches.search_engine, - COUNT(DISTINCT users.user_id) AS ndistinct_user_id - FROM searches AS searches - JOIN users AS users - ON searches.search_user_id = users.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches WHERE - EXTRACT(YEAR FROM CAST(searches.search_ts AS DATETIME)) <= 2019 - AND EXTRACT(YEAR FROM CAST(searches.search_ts AS DATETIME)) >= 2010 + EXTRACT(YEAR FROM CAST(search_ts AS DATETIME)) <= 2019 + AND EXTRACT(YEAR FROM CAST(search_ts AS DATETIME)) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_mysql.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_mysql.sql index ba9b2af64..20ffd6916 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_mysql.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_mysql.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM SEARCHES -), _s3 AS ( +), _s1 AS ( SELECT - SEARCHES.search_engine, - COUNT(DISTINCT USERS.user_id) AS ndistinct_user_id - FROM SEARCHES AS SEARCHES - JOIN USERS AS USERS - ON SEARCHES.search_user_id = USERS.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM SEARCHES WHERE - EXTRACT(YEAR FROM CAST(SEARCHES.search_ts AS DATETIME)) <= 2019 - AND EXTRACT(YEAR FROM CAST(SEARCHES.search_ts AS DATETIME)) >= 2010 + EXTRACT(YEAR FROM CAST(search_ts AS DATETIME)) <= 2019 + AND EXTRACT(YEAR FROM CAST(search_ts AS DATETIME)) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine COLLATE utf8mb4_bin AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine COLLATE utf8mb4_bin AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_postgres.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_postgres.sql index 7d12c81e0..2cd5d513e 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_postgres.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_postgres.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM searches -), _s3 AS ( +), _s1 AS ( SELECT - searches.search_engine, - COUNT(DISTINCT users.user_id) AS ndistinct_user_id - FROM searches AS searches - JOIN users AS users - ON searches.search_user_id = users.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches WHERE - EXTRACT(YEAR FROM CAST(searches.search_ts AS TIMESTAMP)) <= 2019 - AND EXTRACT(YEAR FROM CAST(searches.search_ts AS TIMESTAMP)) >= 2010 + EXTRACT(YEAR FROM CAST(search_ts AS TIMESTAMP)) <= 2019 + AND EXTRACT(YEAR FROM CAST(search_ts AS TIMESTAMP)) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql index 971e806fc..b9bcb8e5b 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM searches -), _s3 AS ( +), _s1 AS ( SELECT - searches.search_engine, - COUNT(DISTINCT users.user_id) AS ndistinct_user_id - FROM searches AS searches - JOIN users AS users - ON searches.search_user_id = users.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches WHERE - YEAR(CAST(searches.search_ts AS TIMESTAMP)) <= 2019 - AND YEAR(CAST(searches.search_ts AS TIMESTAMP)) >= 2010 + YEAR(CAST(search_ts AS TIMESTAMP)) <= 2019 + AND YEAR(CAST(search_ts AS TIMESTAMP)) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_sqlite.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_sqlite.sql index 0c6965b9f..44bc84f50 100644 --- a/tests/test_sql_refsols/epoch_unique_users_per_engine_sqlite.sql +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_sqlite.sql @@ -1,25 +1,23 @@ -WITH _s2 AS ( +WITH _s0 AS ( SELECT DISTINCT search_engine FROM searches -), _s3 AS ( +), _s1 AS ( SELECT - searches.search_engine, - COUNT(DISTINCT users.user_id) AS ndistinct_user_id - FROM searches AS searches - JOIN users AS users - ON searches.search_user_id = users.user_id + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches WHERE - CAST(STRFTIME('%Y', searches.search_ts) AS INTEGER) <= 2019 - AND CAST(STRFTIME('%Y', searches.search_ts) AS INTEGER) >= 2010 + CAST(STRFTIME('%Y', search_ts) AS INTEGER) <= 2019 + AND CAST(STRFTIME('%Y', search_ts) AS INTEGER) >= 2010 GROUP BY 1 ) SELECT - _s2.search_engine AS engine, - COALESCE(_s3.ndistinct_user_id, 0) AS n_users -FROM _s2 AS _s2 -LEFT JOIN _s3 AS _s3 - ON _s2.search_engine = _s3.search_engine + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine ORDER BY 1 diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index a53a1e9b5..c95b9b937 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -25,36 +25,28 @@ WITH _t5 AS ( 2, 3 ), _s11 AS ( - SELECT - _t6.l_linenumber, - _t6.l_orderkey, - orders.o_orderkey - FROM _t5 AS _t6 - JOIN tpch.orders AS orders - ON _t6.l_orderkey = orders.o_orderkey - JOIN tpch.lineitem AS lineitem - ON _t6.l_suppkey <> lineitem.l_suppkey - AND lineitem.l_commitdate < lineitem.l_receiptdate - AND lineitem.l_orderkey = orders.o_orderkey -), _s13 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 - JOIN _s11 AS _s11 - ON _s11.l_linenumber = _t3.l_linenumber - AND _s11.l_orderkey = _t3.l_orderkey - AND _s11.o_orderkey = _t3.o_orderkey + JOIN _t5 AS _t6 + ON _t3.l_linenumber = _t6.l_linenumber + AND _t3.l_orderkey = _t6.l_orderkey + AND _t3.o_orderkey = _t6.l_orderkey + JOIN tpch.lineitem AS lineitem + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey + AND lineitem.l_commitdate < lineitem.l_receiptdate WHERE _t3.anything_o_orderstatus = 'F' ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q21_mysql.sql b/tests/test_sql_refsols/tpch_q21_mysql.sql index 302c196ca..7a70299a0 100644 --- a/tests/test_sql_refsols/tpch_q21_mysql.sql +++ b/tests/test_sql_refsols/tpch_q21_mysql.sql @@ -27,38 +27,34 @@ WITH _t5 AS ( ), _u_0 AS ( SELECT _t6.l_linenumber AS _u_1, - _t6.l_orderkey AS _u_2, - ORDERS.o_orderkey AS _u_3 + _t6.l_orderkey AS _u_2 FROM _t5 AS _t6 - JOIN tpch.ORDERS AS ORDERS - ON ORDERS.o_orderkey = _t6.l_orderkey JOIN tpch.LINEITEM AS LINEITEM ON LINEITEM.l_commitdate < LINEITEM.l_receiptdate - AND LINEITEM.l_orderkey = ORDERS.o_orderkey + AND LINEITEM.l_orderkey = _t6.l_orderkey AND LINEITEM.l_suppkey <> _t6.l_suppkey GROUP BY 1, - 2, - 3 -), _s13 AS ( + 2 +), _s11 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 AND _t3.l_orderkey = _u_0._u_2 - AND _t3.o_orderkey = _u_0._u_3 + AND _t3.o_orderkey = _u_0._u_2 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL ) SELECT ANY_VALUE(SUPPLIER.s_name) COLLATE utf8mb4_bin AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.SUPPLIER AS SUPPLIER JOIN tpch.NATION AS NATION ON NATION.n_name = 'SAUDI ARABIA' AND NATION.n_nationkey = SUPPLIER.s_nationkey -LEFT JOIN _s13 AS _s13 - ON SUPPLIER.s_suppkey = _s13.anything_l_suppkey +LEFT JOIN _s11 AS _s11 + ON SUPPLIER.s_suppkey = _s11.anything_l_suppkey GROUP BY SUPPLIER.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q21_postgres.sql b/tests/test_sql_refsols/tpch_q21_postgres.sql index 79c4527cd..4231c83c5 100644 --- a/tests/test_sql_refsols/tpch_q21_postgres.sql +++ b/tests/test_sql_refsols/tpch_q21_postgres.sql @@ -27,38 +27,34 @@ WITH _t5 AS ( ), _u_0 AS ( SELECT _t6.l_linenumber AS _u_1, - _t6.l_orderkey AS _u_2, - orders.o_orderkey AS _u_3 + _t6.l_orderkey AS _u_2 FROM _t5 AS _t6 - JOIN tpch.orders AS orders - ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t6.l_suppkey <> lineitem.l_suppkey + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate - AND lineitem.l_orderkey = orders.o_orderkey GROUP BY 1, - 2, - 3 -), _s13 AS ( + 2 +), _s11 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 AND _t3.l_orderkey = _u_0._u_2 - AND _t3.o_orderkey = _u_0._u_3 + AND _t3.o_orderkey = _u_0._u_2 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL ) SELECT MAX(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql index aa6a0445b..9e97a650e 100644 --- a/tests/test_sql_refsols/tpch_q21_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -27,38 +27,34 @@ WITH _t5 AS ( ), _u_0 AS ( SELECT _t6.l_linenumber AS _u_1, - _t6.l_orderkey AS _u_2, - orders.o_orderkey AS _u_3 + _t6.l_orderkey AS _u_2 FROM _t5 AS _t6 - JOIN tpch.orders AS orders - ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t6.l_suppkey <> lineitem.l_suppkey + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate - AND lineitem.l_orderkey = orders.o_orderkey GROUP BY 1, - 2, - 3 -), _s13 AS ( + 2 +), _s11 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 AND _t3.l_orderkey = _u_0._u_2 - AND _t3.o_orderkey = _u_0._u_3 + AND _t3.o_orderkey = _u_0._u_2 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL ) SELECT ANY_VALUE(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index c5ceb7d67..6d8bb96d5 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -27,38 +27,34 @@ WITH _t5 AS ( ), _u_0 AS ( SELECT _t6.l_linenumber AS _u_1, - _t6.l_orderkey AS _u_2, - orders.o_orderkey AS _u_3 + _t6.l_orderkey AS _u_2 FROM _t5 AS _t6 - JOIN tpch.orders AS orders - ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t6.l_suppkey <> lineitem.l_suppkey + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate - AND lineitem.l_orderkey = orders.o_orderkey GROUP BY 1, - 2, - 3 -), _s13 AS ( + 2 +), _s11 AS ( SELECT _t3.anything_l_suppkey FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 ON _t3.l_linenumber = _u_0._u_1 AND _t3.l_orderkey = _u_0._u_2 - AND _t3.o_orderkey = _u_0._u_3 + AND _t3.o_orderkey = _u_0._u_2 WHERE _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL ) SELECT MAX(supplier.s_name) AS S_NAME, - COUNT(_s13.anything_l_suppkey) AS NUMWAIT + COUNT(_s11.anything_l_suppkey) AS NUMWAIT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey -LEFT JOIN _s13 AS _s13 - ON _s13.anything_l_suppkey = supplier.s_suppkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey GROUP BY supplier.s_suppkey ORDER BY diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql index 6dd7b2a24..6d13fe38e 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql @@ -3,8 +3,6 @@ SELECT FROM main.country AS country JOIN main.countrynotes AS countrynotes ON country.countrycode = countrynotes.countrycode -JOIN main.series AS series - ON countrynotes.seriescode = series.seriescode - AND series.seriescode = 'DT.DOD.DECT.CD' + AND countrynotes.seriescode = 'DT.DOD.DECT.CD' WHERE country.incomegroup = 'Low income' diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql index 49b7dd612..1ebf23787 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql @@ -1,10 +1,9 @@ WITH _u_0 AS ( SELECT - CountryNotes.countrycode AS _u_1 - FROM main.CountryNotes AS CountryNotes - JOIN main.Series AS Series - ON CountryNotes.seriescode = Series.seriescode - AND Series.seriescode = 'DT.DOD.DECT.CD' + countrycode AS _u_1 + FROM main.CountryNotes + WHERE + seriescode = 'DT.DOD.DECT.CD' GROUP BY 1 ) diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql index fcb2cde21..dbad26bb8 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql @@ -1,10 +1,9 @@ WITH _u_0 AS ( SELECT - countrynotes.countrycode AS _u_1 - FROM main.countrynotes AS countrynotes - JOIN main.series AS series - ON countrynotes.seriescode = series.seriescode - AND series.seriescode = 'DT.DOD.DECT.CD' + countrycode AS _u_1 + FROM main.countrynotes + WHERE + seriescode = 'DT.DOD.DECT.CD' GROUP BY 1 ) diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql index fcb2cde21..dbad26bb8 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql @@ -1,10 +1,9 @@ WITH _u_0 AS ( SELECT - countrynotes.countrycode AS _u_1 - FROM main.countrynotes AS countrynotes - JOIN main.series AS series - ON countrynotes.seriescode = series.seriescode - AND series.seriescode = 'DT.DOD.DECT.CD' + countrycode AS _u_1 + FROM main.countrynotes + WHERE + seriescode = 'DT.DOD.DECT.CD' GROUP BY 1 ) diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql index fcb2cde21..dbad26bb8 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql @@ -1,10 +1,9 @@ WITH _u_0 AS ( SELECT - countrynotes.countrycode AS _u_1 - FROM main.countrynotes AS countrynotes - JOIN main.series AS series - ON countrynotes.seriescode = series.seriescode - AND series.seriescode = 'DT.DOD.DECT.CD' + countrycode AS _u_1 + FROM main.countrynotes + WHERE + seriescode = 'DT.DOD.DECT.CD' GROUP BY 1 )