-
Notifications
You must be signed in to change notification settings - Fork 3
Delete redundant joins by substitution equi-join keys for their mirror to render one side pruneable #450
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Delete redundant joins by substitution equi-join keys for their mirror to render one side pruneable #450
Changes from all commits
9ac45d2
8728487
675421c
097b8a0
18ba964
684b5d7
cc004ec
de7d4e6
e6dea43
3827276
1fff8ea
07136d2
922d356
a678974
602f292
d7ec696
d88cdb5
665a9dd
d1fe25b
0892581
a69d764
13d9844
c497127
f74fc5c
d260428
856e1a9
c4298cb
5e9f09d
c45b4f2
6a7bc49
416fbad
b121c31
05f7147
2697b10
94726ff
e21cf57
44fdd33
a53500d
b5d90f2
0128758
c9a5fe1
d6ed6c6
ebf5339
19d8fe5
80a9ca0
e812143
993553f
85e7c8f
0508822
f59f457
b34f6ca
7884341
b281c8a
78f6e78
e8a54b8
40805f7
6121806
b8a7f77
bc5f383
a828aa9
2914f9b
9f0961d
2e30300
75c3b7c
b94e76d
58cb511
7347536
19a0fb8
bf69fe8
c5fdfef
716782f
60be207
689a4b0
7dced8e
ade8f35
3d9167e
3d17cad
df2e401
e1ae265
6f27a23
2bbf925
1005fde
d8b2fa8
ce7e035
e6c9fbe
94375ce
4914784
8f0fbd3
f150cd5
8d0fc6b
22a94ab
a971676
9c6caa2
ffb58b3
95e59d1
604a7a6
b63c5d4
02c24bd
2c65773
cc12363
5f19dcd
6ec13f1
9344c9a
0e7f48e
75ee111
59850bd
5ab03d7
f460cda
315993d
0fa39c7
4247700
b27b96f
4d899b8
e8fd112
7587597
484decf
7bf3268
cb96cdb
63682a4
a34ec87
65697e8
8f7fbbe
d797c43
3041ac9
efa1335
48e5d3d
b4e3318
091a353
29dd27d
6d4e268
d903c9d
f130bc3
57bb56e
401c1bc
e148c2e
b987f30
d674b0d
4206914
2d22b2a
a9ac04c
a5c6160
4ad8255
ee93d51
bd036f7
7d8ac06
036230a
712d045
1b43fbd
88e74b2
473cc24
ff71831
def9109
3274831
0bae74c
b7fa6e4
6a11435
8991a3e
eb17fe6
cbc5408
f7091d8
2da07fe
0842267
115a926
1d24b15
11e6006
0f9f098
fd026b2
d91d799
6b4313f
d439434
844eb3d
a13bb6a
84264cc
0a65503
7bbc1f3
27696b8
2cdf361
34d6abb
9133bcb
4964df7
f9c07fe
586d7e0
25f5904
b86c5dc
1bca6af
07e6ab8
766604d
a538bb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| """ | ||
| Logic for switching references to join keys from one side of a join to the other | ||
| when certain conditions are met, thus allowing the join to be removed by the | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add in a really high level what are these conditions? |
||
| column pruner. The conditions are: | ||
| - The join is an inner join. | ||
| - The join has equi-join keys. | ||
| - The cardinality in either direction is singular-access. | ||
| - The only columns used from one side of the join (the one being referenced in | ||
| a singular-access manner) are the join keys (or a subset thereof). | ||
| """ | ||
|
|
||
| from pydough.relational import ( | ||
| ColumnReference, | ||
| ColumnReferenceFinder, | ||
| Join, | ||
| JoinCardinality, | ||
| JoinType, | ||
| RelationalExpression, | ||
| RelationalNode, | ||
| RelationalShuttle, | ||
| ) | ||
| from pydough.relational.rel_util import ( | ||
| apply_substitution, | ||
| extract_equijoin_keys, | ||
| ) | ||
|
|
||
|
|
||
| class JoinKeySubstitutionShuttle(RelationalShuttle): | ||
| """ | ||
| The relational shuttle that performs join key substitution optimization. | ||
| """ | ||
|
|
||
| def visit_join(self, join: Join) -> RelationalNode: | ||
| # Build up a mapping of join key substitutions mapping input columns | ||
| # from one input to another when the optimization case is detected: | ||
| # requires an inner join with equi-join keys. | ||
| join_substitution: dict[RelationalExpression, RelationalExpression] = {} | ||
| if join.join_type == JoinType.INNER: | ||
| lhs_keys_list, rhs_keys_list = extract_equijoin_keys(join) | ||
| if len(lhs_keys_list) > 0 and len(rhs_keys_list) > 0: | ||
| # Identify which columns are used by the join columns that come | ||
| # from the left and right inputs. | ||
| lhs_keys: set[ColumnReference] = set(lhs_keys_list) | ||
| rhs_keys: set[ColumnReference] = set(rhs_keys_list) | ||
| col_finder = ColumnReferenceFinder() | ||
| for value in join.columns.values(): | ||
| value.accept(col_finder) | ||
| col_refs: set[ColumnReference] = col_finder.get_column_references() | ||
| lhs_refs = { | ||
| ref | ||
| for ref in col_refs | ||
| if ref.input_name == join.default_input_aliases[0] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For my understanding, how the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but this is using |
||
| } | ||
| rhs_refs = col_refs - lhs_refs | ||
| # If each row on the left side (LHS) matches exactly one row on the right side (RHS) | ||
| # (i.e., singular access) | ||
| # and the query only references columns from the RHS that are join keys, | ||
| # then we can substitute the RHS join keys with the corresponding LHS join keys. | ||
| # This allows the join to potentially be removed later since it adds no new data. | ||
| if ( | ||
| join.cardinality == JoinCardinality.SINGULAR_ACCESS | ||
| and rhs_refs <= rhs_keys | ||
| ): | ||
| for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): | ||
| join_substitution[rhs_key] = lhs_key | ||
|
|
||
| # If the right side is singular access, and all the columns used | ||
| # from the left side are just the join keys, then we can | ||
| # substitute the left join keys with the right join keys. | ||
| elif ( | ||
| join.reverse_cardinality == JoinCardinality.SINGULAR_ACCESS | ||
| and lhs_refs <= lhs_keys | ||
| ): | ||
| for lhs_key, rhs_key in zip(lhs_keys_list, rhs_keys_list): | ||
| join_substitution[lhs_key] = rhs_key | ||
|
|
||
| # If any substitutions were identified, create a new Join node | ||
| # with the substitutions applied to its columns. | ||
| if len(join_substitution) > 0: | ||
| join = Join( | ||
| join.inputs, | ||
| join.condition, | ||
| join.join_type, | ||
| { | ||
| name: apply_substitution(expr, join_substitution, {}) | ||
| for name, expr in join.columns.items() | ||
| }, | ||
| join.cardinality, | ||
| join.reverse_cardinality, | ||
| join.correl_name, | ||
| ) | ||
|
|
||
| # Recursively visit the inputs to the join to transform them as well. | ||
| return super().visit_join(join) | ||
|
|
||
|
|
||
| def join_key_substitution(root: RelationalNode) -> RelationalNode: | ||
| """ | ||
| The main entry point for join key substitution optimization. | ||
|
|
||
| Args: | ||
| `root`: The root of the relational tree being optimized. | ||
|
|
||
| Returns: | ||
| The optimized relational tree. | ||
| """ | ||
| shuttle: JoinKeySubstitutionShuttle = JoinKeySubstitutionShuttle() | ||
| return root.accept_shuttle(shuttle) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why this change?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was a bug in the previous version that I happened to notice. This function is checking if a hybrid tree is singular with regards to its parent context, which mis true if the current level is singular + all levels above it are also singular.