diff --git a/pydough/conversion/column_bubbler.py b/pydough/conversion/column_bubbler.py index 4dc7cb989..880c79da9 100644 --- a/pydough/conversion/column_bubbler.py +++ b/pydough/conversion/column_bubbler.py @@ -76,7 +76,14 @@ def generate_cleaner_names(expr: RelationalExpression, current_name: str) -> lis if len(expr.inputs) == 1: input_expr = expr.inputs[0] if isinstance(input_expr, ColumnReference): - result.append(f"{expr.op.function_name.lower()}_{input_expr.name}") + input_name: str = input_expr.name + # Remove any non-alphanumeric characters to make a cleaner name + # and underscores + input_name = re.sub(r"[^a-zA-Z0-9_]", "", input_name) + cleaner_name: str = f"{expr.op.function_name.lower()}_{input_name}" + + result.append(cleaner_name) + if len(expr.inputs) == 0 and expr.op.function_name.lower() == "count": result.append("n_rows") diff --git a/tests/test_metadata/keywords_graph.json b/tests/test_metadata/keywords_graph.json index 24b2ca5a0..e66c6fa96 100644 --- a/tests/test_metadata/keywords_graph.json +++ b/tests/test_metadata/keywords_graph.json @@ -126,7 +126,7 @@ { "name": "length", "type": "table column", - "column name": "LENGTH", + "column name": "\"LENGTH\"", "data type": "numeric", "description": "Column name normalized from PyDough reserved word 'LENGTH'", "sample values": [ diff --git a/tests/test_pipeline_custom_datasets.py b/tests/test_pipeline_custom_datasets.py index 4ed2ba80d..378b45c57 100644 --- a/tests/test_pipeline_custom_datasets.py +++ b/tests/test_pipeline_custom_datasets.py @@ -200,19 +200,28 @@ PyDoughPandasTest( """ result = keywords.CALCULATE( - max_len=MAX(partition_.integer) -).calculate_.WHERE( - where_ == max_len -).CALCULATE(key=where_, len=length) + max_where=MAX(partition_.where_), + min_quote=MIN(quoted_table_name.quote_), + max_name=MAX(quoted_table_name.name), + count_cast=COUNT(quoted_table_name.cast_), + quote_avg=AVG(quoted_table_name.quote_), + sum_name=SUM(quoted_table_name.name), +) """, "keywords", - lambda: pd.DataFrame({"key": [3], "len": [7]}), - "keywords_function_quoted_name", - ), - id="keywords_function_quoted_name", - marks=pytest.mark.skip( - "FIX: (issue #458): Invalid composed SQL alias where column_name is quoted." + lambda: pd.DataFrame( + { + "max_where": [5], + "min_quote": [1], + "max_name": [11], + "count_cast": [5], + "quote_avg": [4.2], + "sum_name": [35], + } + ), + "keywords_expr_call_quoted_names", ), + id="keywords_expr_call_quoted_names", ), ], ) diff --git a/tests/test_plan_refsols/keywords_expr_call_quoted_names.txt b/tests/test_plan_refsols/keywords_expr_call_quoted_names.txt new file mode 100644 index 000000000..169e22ce9 --- /dev/null +++ b/tests/test_plan_refsols/keywords_expr_call_quoted_names.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('max_where', max_WHERE), ('min_quote', min_QUOTE), ('max_name', max_name), ('count_cast', count_cast), ('quote_avg', avg_QUOTE), ('sum_name', DEFAULT_TO(sum_name, 0:numeric))], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_QUOTE': t1.avg_QUOTE, 'count_cast': t1.count_cast, 'max_WHERE': t0.max_WHERE, 'max_name': t1.max_name, 'min_QUOTE': t1.min_QUOTE, 'sum_name': t1.sum_name}) + AGGREGATE(keys={}, aggregations={'max_WHERE': MAX("WHERE")}) + SCAN(table=keywords."PARTITION", columns={'"WHERE"': "WHERE"}) + AGGREGATE(keys={}, aggregations={'avg_QUOTE': AVG(`= "QUOTE"`), 'count_cast': COUNT(```cast```), 'max_name': MAX("`name""["), 'min_QUOTE': MIN(`= "QUOTE"`), 'sum_name': SUM("`name""[")}) + SCAN(table=keywords."""QUOTED TABLE_NAME""", columns={'"`name""["': "`name""[", '`= "QUOTE"`': `= "QUOTE"`, '```cast```': ```cast```}) diff --git a/tests/test_sql_refsols/keywords_expr_call_quoted_names_ansi.sql b/tests/test_sql_refsols/keywords_expr_call_quoted_names_ansi.sql new file mode 100644 index 000000000..e87838610 --- /dev/null +++ b/tests/test_sql_refsols/keywords_expr_call_quoted_names_ansi.sql @@ -0,0 +1,22 @@ +WITH _s0 AS ( + SELECT + MAX("WHERE") AS max_where + FROM keywords."PARTITION" +), _s1 AS ( + SELECT + AVG("= ""QUOTE""") AS avg_quote, + COUNT("`cast`") AS count_cast, + MAX("`name""[") AS max_name, + MIN("= ""QUOTE""") AS min_quote, + SUM("`name""[") AS sum_name + FROM keywords."""QUOTED TABLE_NAME""" +) +SELECT + _s0.max_where, + _s1.min_quote, + _s1.max_name, + _s1.count_cast, + _s1.avg_quote AS quote_avg, + COALESCE(_s1.sum_name, 0) AS sum_name +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/keywords_expr_call_quoted_names_mysql.sql b/tests/test_sql_refsols/keywords_expr_call_quoted_names_mysql.sql new file mode 100644 index 000000000..b8759dc49 --- /dev/null +++ b/tests/test_sql_refsols/keywords_expr_call_quoted_names_mysql.sql @@ -0,0 +1,22 @@ +WITH _s0 AS ( + SELECT + MAX(`WHERE`) AS max_WHERE + FROM keywords.`PARTITION` +), _s1 AS ( + SELECT + AVG(`= "QUOTE"`) AS avg_QUOTE, + COUNT(```cast```) AS count_cast, + MAX(```name"[`) AS max_name, + MIN(`= "QUOTE"`) AS min_QUOTE, + SUM(```name"[`) AS sum_name + FROM keywords.`"QUOTED TABLE_NAME"` +) +SELECT + _s0.max_WHERE AS max_where, + _s1.min_QUOTE AS min_quote, + _s1.max_name, + _s1.count_cast, + _s1.avg_QUOTE AS quote_avg, + COALESCE(_s1.sum_name, 0) AS sum_name +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/keywords_expr_call_quoted_names_postgres.sql b/tests/test_sql_refsols/keywords_expr_call_quoted_names_postgres.sql new file mode 100644 index 000000000..253946776 --- /dev/null +++ b/tests/test_sql_refsols/keywords_expr_call_quoted_names_postgres.sql @@ -0,0 +1,22 @@ +WITH _s0 AS ( + SELECT + MAX("WHERE") AS max_where + FROM keywords."PARTITION" +), _s1 AS ( + SELECT + AVG(CAST("= ""QUOTE""" AS DECIMAL)) AS avg_quote, + COUNT("`cast`") AS count_cast, + MAX("`name""[") AS max_name, + MIN("= ""QUOTE""") AS min_quote, + SUM("`name""[") AS sum_name + FROM keywords."""QUOTED TABLE_NAME""" +) +SELECT + _s0.max_where, + _s1.min_quote, + _s1.max_name, + _s1.count_cast, + _s1.avg_quote AS quote_avg, + COALESCE(_s1.sum_name, 0) AS sum_name +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/keywords_expr_call_quoted_names_snowflake.sql b/tests/test_sql_refsols/keywords_expr_call_quoted_names_snowflake.sql new file mode 100644 index 000000000..e87838610 --- /dev/null +++ b/tests/test_sql_refsols/keywords_expr_call_quoted_names_snowflake.sql @@ -0,0 +1,22 @@ +WITH _s0 AS ( + SELECT + MAX("WHERE") AS max_where + FROM keywords."PARTITION" +), _s1 AS ( + SELECT + AVG("= ""QUOTE""") AS avg_quote, + COUNT("`cast`") AS count_cast, + MAX("`name""[") AS max_name, + MIN("= ""QUOTE""") AS min_quote, + SUM("`name""[") AS sum_name + FROM keywords."""QUOTED TABLE_NAME""" +) +SELECT + _s0.max_where, + _s1.min_quote, + _s1.max_name, + _s1.count_cast, + _s1.avg_quote AS quote_avg, + COALESCE(_s1.sum_name, 0) AS sum_name +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/keywords_expr_call_quoted_names_sqlite.sql b/tests/test_sql_refsols/keywords_expr_call_quoted_names_sqlite.sql new file mode 100644 index 000000000..cf0e22681 --- /dev/null +++ b/tests/test_sql_refsols/keywords_expr_call_quoted_names_sqlite.sql @@ -0,0 +1,22 @@ +WITH _s0 AS ( + SELECT + MAX("where") AS max_where + FROM keywords."partition" +), _s1 AS ( + SELECT + AVG("= ""quote""") AS avg_quote, + COUNT("`cast`") AS count_cast, + MAX("`name""[") AS max_name, + MIN("= ""quote""") AS min_quote, + SUM("`name""[") AS sum_name + FROM keywords."""quoted table_name""" +) +SELECT + _s0.max_where, + _s1.min_quote, + _s1.max_name, + _s1.count_cast, + _s1.avg_quote AS quote_avg, + COALESCE(_s1.sum_name, 0) AS sum_name +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1