Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
ed53c99
WIP: downloading from s3
john-sanchez31 Nov 7, 2025
a05fab6
testing infrastructure for s3 custom datasets [run ci]
john-sanchez31 Nov 11, 2025
fddceff
flag run custom for custom dataset tests created
john-sanchez31 Nov 12, 2025
26d039c
adding all connectors for custom test
john-sanchez31 Nov 12, 2025
cd894dd
synthea s3 testing, adding bug test [run custom]
john-sanchez31 Nov 12, 2025
7b1f1f0
excluding custom test for [run ci] [run custom]
john-sanchez31 Nov 13, 2025
f31176e
Fix for call expression alias bug with quoted column names
john-sanchez31 Nov 14, 2025
ad86371
quoting the entire alias, adding a test for each function [run all]
john-sanchez31 Nov 14, 2025
277aea7
now dialect flag does not run custom tests
john-sanchez31 Nov 14, 2025
4fc6a2f
testing [run all]
john-sanchez31 Nov 14, 2025
dce5428
adding mark and secrets for custom tests [run all]
john-sanchez31 Nov 14, 2025
1af103d
set env for custom dataset [run all]
john-sanchez31 Nov 14, 2025
2ee3a34
custom ci separated [run all]
john-sanchez31 Nov 17, 2025
5680d1f
secret name fixed [run all]
john-sanchez31 Nov 17, 2025
85b4e58
Merge branch 'John/s3_testing' into John/callexp_alias_patch
john-sanchez31 Nov 17, 2025
c618f81
testing [run all]
john-sanchez31 Nov 17, 2025
78fb6e5
comments addressed [run all]
john-sanchez31 Nov 17, 2025
bd7dd0d
using input_name [run all]
john-sanchez31 Nov 17, 2025
447fcb4
keeping the db files
john-sanchez31 Nov 19, 2025
654b105
custom and s3 datasets separated
john-sanchez31 Nov 19, 2025
cf2aac2
s3 flag created [run s3]
john-sanchez31 Nov 19, 2025
f116a22
conlficts solved [run s3]
john-sanchez31 Nov 19, 2025
10b2e23
testing [run all]
john-sanchez31 Nov 19, 2025
68e2139
fixture added [run all]
john-sanchez31 Nov 19, 2025
47fef7f
testing [run ci]
john-sanchez31 Nov 19, 2025
25e1474
no initialized db fixed [run all]
john-sanchez31 Nov 19, 2025
fa6f4d1
init script added [run all]
john-sanchez31 Nov 19, 2025
cea2b25
Merge branch 'John/s3_testing' into John/callexp_alias_patch
john-sanchez31 Nov 20, 2025
2fc522d
removing special chars [run all]
john-sanchez31 Nov 20, 2025
31521a7
testing sf [run all]
john-sanchez31 Nov 20, 2025
e5bf954
sf masked tests updated [run all]
john-sanchez31 Nov 20, 2025
3bc7a8e
test refsol updated [run all]
john-sanchez31 Nov 20, 2025
fc3e38f
undo underscore allowed [run all]
john-sanchez31 Nov 20, 2025
21ce516
conflicts solved
john-sanchez31 Nov 20, 2025
df077b4
testing [run all]
john-sanchez31 Nov 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion pydough/conversion/column_bubbler.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,14 @@ def generate_cleaner_names(expr: RelationalExpression, current_name: str) -> lis
if len(expr.inputs) == 1:
input_expr = expr.inputs[0]
if isinstance(input_expr, ColumnReference):
result.append(f"{expr.op.function_name.lower()}_{input_expr.name}")
input_name: str = input_expr.name
# Remove any non-alphanumeric characters to make a cleaner name
# and underscores
input_name = re.sub(r"[^a-zA-Z0-9_]", "", input_name)
cleaner_name: str = f"{expr.op.function_name.lower()}_{input_name}"

result.append(cleaner_name)

if len(expr.inputs) == 0 and expr.op.function_name.lower() == "count":
result.append("n_rows")

Expand Down
2 changes: 1 addition & 1 deletion tests/test_metadata/keywords_graph.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
{
"name": "length",
"type": "table column",
"column name": "LENGTH",
"column name": "\"LENGTH\"",
"data type": "numeric",
"description": "Column name normalized from PyDough reserved word 'LENGTH'",
"sample values": [
Expand Down
29 changes: 19 additions & 10 deletions tests/test_pipeline_custom_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,19 +200,28 @@
PyDoughPandasTest(
"""
result = keywords.CALCULATE(
max_len=MAX(partition_.integer)
).calculate_.WHERE(
where_ == max_len
).CALCULATE(key=where_, len=length)
max_where=MAX(partition_.where_),
min_quote=MIN(quoted_table_name.quote_),
max_name=MAX(quoted_table_name.name),
count_cast=COUNT(quoted_table_name.cast_),
quote_avg=AVG(quoted_table_name.quote_),
sum_name=SUM(quoted_table_name.name),
)
""",
"keywords",
lambda: pd.DataFrame({"key": [3], "len": [7]}),
"keywords_function_quoted_name",
),
id="keywords_function_quoted_name",
marks=pytest.mark.skip(
"FIX: (issue #458): Invalid composed SQL alias where column_name is quoted."
lambda: pd.DataFrame(
{
"max_where": [5],
"min_quote": [1],
"max_name": [11],
"count_cast": [5],
"quote_avg": [4.2],
"sum_name": [35],
}
),
"keywords_expr_call_quoted_names",
),
id="keywords_expr_call_quoted_names",
),
],
)
Expand Down
6 changes: 6 additions & 0 deletions tests/test_plan_refsols/keywords_expr_call_quoted_names.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ROOT(columns=[('max_where', max_WHERE), ('min_quote', min_QUOTE), ('max_name', max_name), ('count_cast', count_cast), ('quote_avg', avg_QUOTE), ('sum_name', DEFAULT_TO(sum_name, 0:numeric))], orderings=[])
JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_QUOTE': t1.avg_QUOTE, 'count_cast': t1.count_cast, 'max_WHERE': t0.max_WHERE, 'max_name': t1.max_name, 'min_QUOTE': t1.min_QUOTE, 'sum_name': t1.sum_name})
AGGREGATE(keys={}, aggregations={'max_WHERE': MAX("WHERE")})
SCAN(table=keywords."PARTITION", columns={'"WHERE"': "WHERE"})
AGGREGATE(keys={}, aggregations={'avg_QUOTE': AVG(`= "QUOTE"`), 'count_cast': COUNT(```cast```), 'max_name': MAX("`name""["), 'min_QUOTE': MIN(`= "QUOTE"`), 'sum_name': SUM("`name""[")})
SCAN(table=keywords."""QUOTED TABLE_NAME""", columns={'"`name""["': "`name""[", '`= "QUOTE"`': `= "QUOTE"`, '```cast```': ```cast```})
22 changes: 22 additions & 0 deletions tests/test_sql_refsols/keywords_expr_call_quoted_names_ansi.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
WITH _s0 AS (
SELECT
MAX("WHERE") AS max_where
FROM keywords."PARTITION"
), _s1 AS (
SELECT
AVG("= ""QUOTE""") AS avg_quote,
COUNT("`cast`") AS count_cast,
MAX("`name""[") AS max_name,
MIN("= ""QUOTE""") AS min_quote,
SUM("`name""[") AS sum_name
FROM keywords."""QUOTED TABLE_NAME"""
)
SELECT
_s0.max_where,
_s1.min_quote,
_s1.max_name,
_s1.count_cast,
_s1.avg_quote AS quote_avg,
COALESCE(_s1.sum_name, 0) AS sum_name
FROM _s0 AS _s0
CROSS JOIN _s1 AS _s1
22 changes: 22 additions & 0 deletions tests/test_sql_refsols/keywords_expr_call_quoted_names_mysql.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
WITH _s0 AS (
SELECT
MAX(`WHERE`) AS max_WHERE
FROM keywords.`PARTITION`
), _s1 AS (
SELECT
AVG(`= "QUOTE"`) AS avg_QUOTE,
COUNT(```cast```) AS count_cast,
MAX(```name"[`) AS max_name,
MIN(`= "QUOTE"`) AS min_QUOTE,
SUM(```name"[`) AS sum_name
FROM keywords.`"QUOTED TABLE_NAME"`
)
SELECT
_s0.max_WHERE AS max_where,
_s1.min_QUOTE AS min_quote,
_s1.max_name,
_s1.count_cast,
_s1.avg_QUOTE AS quote_avg,
COALESCE(_s1.sum_name, 0) AS sum_name
FROM _s0 AS _s0
CROSS JOIN _s1 AS _s1
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
WITH _s0 AS (
SELECT
MAX("WHERE") AS max_where
FROM keywords."PARTITION"
), _s1 AS (
SELECT
AVG(CAST("= ""QUOTE""" AS DECIMAL)) AS avg_quote,
COUNT("`cast`") AS count_cast,
MAX("`name""[") AS max_name,
MIN("= ""QUOTE""") AS min_quote,
SUM("`name""[") AS sum_name
FROM keywords."""QUOTED TABLE_NAME"""
)
SELECT
_s0.max_where,
_s1.min_quote,
_s1.max_name,
_s1.count_cast,
_s1.avg_quote AS quote_avg,
COALESCE(_s1.sum_name, 0) AS sum_name
FROM _s0 AS _s0
CROSS JOIN _s1 AS _s1
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
WITH _s0 AS (
SELECT
MAX("WHERE") AS max_where
FROM keywords."PARTITION"
), _s1 AS (
SELECT
AVG("= ""QUOTE""") AS avg_quote,
COUNT("`cast`") AS count_cast,
MAX("`name""[") AS max_name,
MIN("= ""QUOTE""") AS min_quote,
SUM("`name""[") AS sum_name
FROM keywords."""QUOTED TABLE_NAME"""
)
SELECT
_s0.max_where,
_s1.min_quote,
_s1.max_name,
_s1.count_cast,
_s1.avg_quote AS quote_avg,
COALESCE(_s1.sum_name, 0) AS sum_name
FROM _s0 AS _s0
CROSS JOIN _s1 AS _s1
22 changes: 22 additions & 0 deletions tests/test_sql_refsols/keywords_expr_call_quoted_names_sqlite.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
WITH _s0 AS (
SELECT
MAX("where") AS max_where
FROM keywords."partition"
), _s1 AS (
SELECT
AVG("= ""quote""") AS avg_quote,
COUNT("`cast`") AS count_cast,
MAX("`name""[") AS max_name,
MIN("= ""quote""") AS min_quote,
SUM("`name""[") AS sum_name
FROM keywords."""quoted table_name"""
)
SELECT
_s0.max_where,
_s1.min_quote,
_s1.max_name,
_s1.count_cast,
_s1.avg_quote AS quote_avg,
COALESCE(_s1.sum_name, 0) AS sum_name
FROM _s0 AS _s0
CROSS JOIN _s1 AS _s1