Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
499c000
attempt 1
hadia206 Jul 15, 2025
c192a6e
add collection back
hadia206 Jul 16, 2025
77f63d1
Merge branch 'main' of https://github.com/bodo-ai/PyDough into Hadia/…
hadia206 Jul 16, 2025
721eecc
range collections base, initial try
hadia206 Jul 14, 2025
4a53e01
add range_collection to pydough top
hadia206 Jul 15, 2025
d0ce87f
add test
hadia206 Jul 15, 2025
3c5ed66
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2025
8893d79
address comments
hadia206 Jul 16, 2025
a7469d3
merge conflict
hadia206 Jul 16, 2025
9e66de2
update test
hadia206 Jul 16, 2025
025b556
make range inside UnqualifiedGeneratedCollection
hadia206 Jul 16, 2025
29e1e74
[run CI] hybrid and execute
hadia206 Jul 18, 2025
dc3c686
fix uniqueness, singular, always exists, and add another test
hadia206 Jul 18, 2025
a626dc1
[run CI] more tests and fix empty table
hadia206 Jul 19, 2025
8491cc6
add other tests (skipped as they're not passing)
hadia206 Jul 22, 2025
6315f3d
Fixing test bugs
knassre-bodo Jul 22, 2025
d4230e4
Fixing hybrid/qualification/conversion bugs
knassre-bodo Jul 22, 2025
a465cbe
merge conflicts
hadia206 Oct 17, 2025
54c88ec
add SF support and test
hadia206 Oct 31, 2025
b55df23
remove unneeded code
hadia206 Oct 31, 2025
420cb5c
[run all] update test 6
hadia206 Nov 3, 2025
b6e11bb
[run all] docs
hadia206 Nov 3, 2025
b7a252f
[run all] missed file
hadia206 Nov 3, 2025
5145d4e
[run all] merge conflict
hadia206 Nov 3, 2025
b1e151b
[run all] docs, remove ansi/sqlite files, fix quoted check
hadia206 Nov 3, 2025
136d526
[run all] fix AST print
hadia206 Nov 3, 2025
40c7228
[run all] one more try
hadia206 Nov 3, 2025
3b6463d
replace base with not implemented error
hadia206 Nov 5, 2025
46daebe
address John comments
hadia206 Nov 6, 2025
0ae47e7
[run all] address Kian comments
hadia206 Nov 6, 2025
7c08883
Merge branch 'main' of https://github.com/bodo-ai/PyDough into Hadia/…
hadia206 Nov 7, 2025
1fb9c44
[run all]
hadia206 Nov 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pydough/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"get_logger",
"init_pydough_context",
"parse_json_metadata_from_file",
"range_collection",
"to_df",
"to_sql",
]
Expand All @@ -22,6 +23,7 @@
from .logger import get_logger
from .metadata import parse_json_metadata_from_file
from .unqualified import display_raw, from_string, init_pydough_context
from .user_collections.user_collection_apis import range_collection

# Create a default session for the user to interact with.
# In most situations users will just use this session and
Expand Down
3 changes: 2 additions & 1 deletion pydough/conversion/agg_removal.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
CallExpression,
EmptySingleton,
Filter,
GeneratedTable,
Join,
JoinType,
Limit,
Expand Down Expand Up @@ -276,7 +277,7 @@ def aggregation_uniqueness_helper(
)
return node, final_uniqueness
# Empty singletons don't have uniqueness information.
case EmptySingleton():
case EmptySingleton() | GeneratedTable():
return node, set()
case _:
raise NotImplementedError(
Expand Down
6 changes: 6 additions & 0 deletions pydough/conversion/filter_pushdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
ColumnReference,
EmptySingleton,
Filter,
GeneratedTable,
Join,
JoinCardinality,
JoinType,
Expand Down Expand Up @@ -306,6 +307,11 @@ def visit_empty_singleton(self, empty_singleton: EmptySingleton) -> RelationalNo
# cannot be pushed down any further.
return self.flush_remaining_filters(empty_singleton, self.filters, set())

def visit_generated_table(self, generated_table: GeneratedTable) -> RelationalNode:
# Materialize all filters before the user generated table, since they
# cannot be pushed down any further.
return self.flush_remaining_filters(generated_table, self.filters, set())


def push_filters(node: RelationalNode, session: PyDoughSession) -> RelationalNode:
"""
Expand Down
35 changes: 35 additions & 0 deletions pydough/conversion/hybrid_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"HybridPartition",
"HybridPartitionChild",
"HybridRoot",
"HybridUserGeneratedCollection",
]


Expand All @@ -27,6 +28,9 @@
ColumnProperty,
PyDoughExpressionQDAG,
)
from pydough.qdag.collections.user_collection_qdag import (
PyDoughUserGeneratedCollectionQDag,
)

from .hybrid_connection import HybridConnection
from .hybrid_expressions import (
Expand Down Expand Up @@ -483,3 +487,34 @@ def __repr__(self):

def search_term_definition(self, name: str) -> HybridExpr | None:
return self.predecessor.search_term_definition(name)


class HybridUserGeneratedCollection(HybridOperation):
"""
Class for HybridOperation corresponding to a user-generated collection.
"""

def __init__(self, user_collection: PyDoughUserGeneratedCollectionQDag):
"""
Args:
`collection`: the QDAG node for the user-generated collection.
"""
self._user_collection: PyDoughUserGeneratedCollectionQDag = user_collection
terms: dict[str, HybridExpr] = {}
for name, typ in user_collection.collection.column_names_and_types:
terms[name] = HybridRefExpr(name, typ)
unique_exprs: list[HybridExpr] = []
for name in sorted(self.user_collection.unique_terms, key=str):
expr: PyDoughExpressionQDAG = self.user_collection.get_expr(name)
unique_exprs.append(HybridRefExpr(name, expr.pydough_type))
super().__init__(terms, {}, [], unique_exprs)

@property
def user_collection(self) -> PyDoughUserGeneratedCollectionQDag:
"""
The user-generated collection that this hybrid operation represents.
"""
return self._user_collection

def __repr__(self):
return self.user_collection.to_string()
30 changes: 30 additions & 0 deletions pydough/conversion/hybrid_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
Where,
WindowCall,
)
from pydough.qdag.collections.user_collection_qdag import (
PyDoughUserGeneratedCollectionQDag,
)
from pydough.types import BooleanType, NumericType

from .hybrid_connection import ConnectionType, HybridConnection
Expand Down Expand Up @@ -71,6 +74,7 @@
HybridPartition,
HybridPartitionChild,
HybridRoot,
HybridUserGeneratedCollection,
)
from .hybrid_syncretizer import HybridSyncretizer
from .hybrid_tree import HybridTree
Expand Down Expand Up @@ -1308,6 +1312,9 @@ def define_root_link(
case HybridRoot():
# A root does not need to be joined to its parent
join_keys = []
case HybridUserGeneratedCollection():
# A user-generated collection does not need to be joined to its parent
join_keys = []
case _:
raise NotImplementedError(f"{operation.__class__.__name__}")
if join_keys is not None:
Expand Down Expand Up @@ -1533,6 +1540,18 @@ def make_hybrid_tree(
HybridLimit(hybrid.pipeline[-1], node.records_to_keep)
)
return hybrid
case PyDoughUserGeneratedCollectionQDag():
# A user-generated collection is a special case of a collection
# access that is not a sub-collection, but rather a user-defined
# collection that is defined in the PyDough user collections.
hybrid_collection = HybridUserGeneratedCollection(node)
# Create a new hybrid tree for the user-generated collection.
successor_hybrid = HybridTree(hybrid_collection, node.ancestral_mapping)
hybrid = self.make_hybrid_tree(
node.ancestor_context, parent, is_aggregate
)
hybrid.add_successor(successor_hybrid)
return successor_hybrid
case ChildOperatorChildAccess():
assert parent is not None
match node.child_access:
Expand Down Expand Up @@ -1605,6 +1624,17 @@ def make_hybrid_tree(
successor_hybrid = HybridTree(
HybridRoot(), node.ancestral_mapping
)
case PyDoughUserGeneratedCollectionQDag():
# A user-generated collection is a special case of a collection
# access that is not a sub-collection, but rather a user-defined
# collection that is defined in the PyDough user collections.
hybrid_collection = HybridUserGeneratedCollection(
node.child_access
)
# Create a new hybrid tree for the user-generated collection.
successor_hybrid = HybridTree(
hybrid_collection, node.ancestral_mapping
)
case _:
raise NotImplementedError(
f"{node.__class__.__name__} (child is {node.child_access.__class__.__name__})"
Expand Down
5 changes: 5 additions & 0 deletions pydough/conversion/hybrid_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
HybridPartition,
HybridPartitionChild,
HybridRoot,
HybridUserGeneratedCollection,
)


Expand Down Expand Up @@ -792,6 +793,8 @@ def always_exists(self) -> bool:
# Stepping into a partition child always has a matching data
# record for each parent, by definition.
pass
case HybridUserGeneratedCollection():
return start_operation.user_collection.collection.always_exists()
case _:
raise NotImplementedError(
f"Invalid start of pipeline: {start_operation.__class__.__name__}"
Expand Down Expand Up @@ -842,6 +845,8 @@ def is_singular(self) -> bool:
case HybridChildPullUp():
if not self.children[self.pipeline[0].child_idx].subtree.is_singular():
return False
case HybridUserGeneratedCollection():
return self.pipeline[0].user_collection.collection.is_singular()
case HybridRoot():
pass
case _:
Expand Down
38 changes: 38 additions & 0 deletions pydough/conversion/relational_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
EmptySingleton,
ExpressionSortInfo,
Filter,
GeneratedTable,
Join,
JoinCardinality,
JoinType,
Expand Down Expand Up @@ -82,6 +83,7 @@
HybridPartition,
HybridPartitionChild,
HybridRoot,
HybridUserGeneratedCollection,
)
from .hybrid_translator import HybridTranslator
from .hybrid_tree import HybridTree
Expand Down Expand Up @@ -1267,6 +1269,29 @@ def translate_hybridroot(self, context: TranslationOutput) -> TranslationOutput:
new_expressions[shifted_expr] = column_ref
return TranslationOutput(context.relational_node, new_expressions)

def build_user_generated_table(
self, node: HybridUserGeneratedCollection
) -> TranslationOutput:
"""Builds a user-generated table from the given hybrid user-generated collection.

Args:
`node`: The user-generated collection node to translate.

Returns:
The translated output payload.
"""
collection = node._user_collection.collection
out_columns: dict[HybridExpr, ColumnReference] = {}
gen_columns: dict[str, RelationalExpression] = {}
for column_name, column_type in collection.column_names_and_types:
hybrid_ref = HybridRefExpr(column_name, column_type)
col_ref = ColumnReference(column_name, column_type)
out_columns[hybrid_ref] = col_ref
gen_columns[column_name] = col_ref

answer = GeneratedTable(collection)
return TranslationOutput(answer, out_columns)

def rel_translation(
self,
hybrid: HybridTree,
Expand Down Expand Up @@ -1395,6 +1420,19 @@ def rel_translation(
case HybridRoot():
assert context is not None, "Malformed HybridTree pattern."
result = self.translate_hybridroot(context)
case HybridUserGeneratedCollection():
assert context is not None, "Malformed HybridTree pattern."
result = self.build_user_generated_table(operation)
result = self.join_outputs(
context,
result,
JoinType.INNER,
JoinCardinality.PLURAL_ACCESS,
JoinCardinality.SINGULAR_ACCESS,
[],
None,
None,
)
case _:
raise NotImplementedError(
f"TODO: support relational conversion on {operation.__class__.__name__}"
Expand Down
7 changes: 7 additions & 0 deletions pydough/conversion/relational_simplification.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
CorrelatedReference,
EmptySingleton,
Filter,
GeneratedTable,
Join,
JoinType,
Limit,
Expand Down Expand Up @@ -1552,6 +1553,12 @@ def visit_empty_singleton(self, node: EmptySingleton) -> None:
)
self.stack.append(output_predicates)

def visit_generated_table(self, node: GeneratedTable) -> None:
output_predicates: dict[RelationalExpression, PredicateSet] = (
self.generic_visit(node)
)
self.stack.append(output_predicates)

def visit_project(self, node: Project) -> None:
output_predicates: dict[RelationalExpression, PredicateSet] = (
self.generic_visit(node)
Expand Down
1 change: 1 addition & 0 deletions pydough/qdag/collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"TableCollection",
"TopK",
"Where",
"range_collection",
]

from .augmenting_child_operator import AugmentingChildOperator
Expand Down
Loading