Skip to content

Commit 87eec43

Browse files
authored
fix: compound_field_access doesn't identifier qualifier. (#15153)
* fix: subscript * format * refactor with jonahgao's suggestion * move test to slt
1 parent d7c70e4 commit 87eec43

File tree

2 files changed

+114
-41
lines changed

2 files changed

+114
-41
lines changed

datafusion/sql/src/expr/mod.rs

Lines changed: 77 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ use sqlparser::ast::{
2626
};
2727

2828
use datafusion_common::{
29-
internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, DFSchema,
30-
Result, ScalarValue,
29+
internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
30+
ScalarValue,
3131
};
3232

3333
use datafusion_expr::expr::ScalarFunction;
@@ -983,14 +983,77 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
983983
Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
984984
}
985985

986+
/// Extracts the root expression and access chain from a compound expression.
987+
///
988+
/// This function attempts to identify if a compound expression (like `a.b.c`) should be treated
989+
/// as a column reference with a qualifier (like `table.column`) or as a field access expression.
990+
///
991+
/// # Arguments
992+
///
993+
/// * `root` - The root SQL expression (e.g., the first part of `a.b.c`)
994+
/// * `access_chain` - Vector of access expressions (e.g., `.b` and `.c` parts)
995+
/// * `schema` - The schema to resolve column references against
996+
/// * `planner_context` - Context for planning expressions
997+
///
998+
/// # Returns
999+
///
1000+
/// A tuple containing:
1001+
/// * The resolved root expression
1002+
/// * The remaining access chain that should be processed as field accesses
1003+
fn extract_root_and_access_chain(
1004+
&self,
1005+
root: SQLExpr,
1006+
mut access_chain: Vec<AccessExpr>,
1007+
schema: &DFSchema,
1008+
planner_context: &mut PlannerContext,
1009+
) -> Result<(Expr, Vec<AccessExpr>)> {
1010+
let SQLExpr::Identifier(root_ident) = root else {
1011+
let root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
1012+
return Ok((root, access_chain));
1013+
};
1014+
1015+
let mut compound_idents = vec![root_ident];
1016+
let first_non_ident = access_chain
1017+
.iter()
1018+
.position(|access| !matches!(access, AccessExpr::Dot(SQLExpr::Identifier(_))))
1019+
.unwrap_or(access_chain.len());
1020+
for access in access_chain.drain(0..first_non_ident) {
1021+
if let AccessExpr::Dot(SQLExpr::Identifier(ident)) = access {
1022+
compound_idents.push(ident);
1023+
} else {
1024+
return internal_err!("Expected identifier in access chain");
1025+
}
1026+
}
1027+
1028+
let root = if compound_idents.len() == 1 {
1029+
self.sql_identifier_to_expr(
1030+
compound_idents.pop().unwrap(),
1031+
schema,
1032+
planner_context,
1033+
)?
1034+
} else {
1035+
self.sql_compound_identifier_to_expr(
1036+
compound_idents,
1037+
schema,
1038+
planner_context,
1039+
)?
1040+
};
1041+
Ok((root, access_chain))
1042+
}
1043+
9861044
fn sql_compound_field_access_to_expr(
9871045
&self,
9881046
root: SQLExpr,
9891047
access_chain: Vec<AccessExpr>,
9901048
schema: &DFSchema,
9911049
planner_context: &mut PlannerContext,
9921050
) -> Result<Expr> {
993-
let mut root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
1051+
let (root, access_chain) = self.extract_root_and_access_chain(
1052+
root,
1053+
access_chain,
1054+
schema,
1055+
planner_context,
1056+
)?;
9941057
let fields = access_chain
9951058
.into_iter()
9961059
.map(|field| match field {
@@ -1064,45 +1127,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
10641127
}
10651128
}
10661129
}
1067-
AccessExpr::Dot(expr) => {
1068-
let expr =
1069-
self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
1070-
match expr {
1071-
Expr::Column(Column {
1072-
name,
1073-
relation,
1074-
spans,
1075-
}) => {
1076-
if let Some(relation) = &relation {
1077-
// If the first part of the dot access is a column reference, we should
1078-
// check if the column is from the same table as the root expression.
1079-
// If it is, we should replace the root expression with the column reference.
1080-
// Otherwise, we should treat the dot access as a named field access.
1081-
if relation.table() == root.schema_name().to_string() {
1082-
root = Expr::Column(Column {
1083-
name,
1084-
relation: Some(relation.clone()),
1085-
spans,
1086-
});
1087-
Ok(None)
1088-
} else {
1089-
plan_err!(
1090-
"table name mismatch: {} != {}",
1091-
relation.table(),
1092-
root.schema_name()
1093-
)
1094-
}
1095-
} else {
1096-
Ok(Some(GetFieldAccess::NamedStructField {
1097-
name: ScalarValue::from(name),
1098-
}))
1099-
}
1100-
}
1101-
_ => not_impl_err!(
1102-
"Dot access not supported for non-column expr: {expr:?}"
1103-
),
1130+
AccessExpr::Dot(expr) => match expr {
1131+
SQLExpr::Value(
1132+
Value::SingleQuotedString(s) | Value::DoubleQuotedString(s),
1133+
) => Ok(Some(GetFieldAccess::NamedStructField {
1134+
name: ScalarValue::from(s),
1135+
})),
1136+
_ => {
1137+
not_impl_err!(
1138+
"Dot access not supported for non-string expr: {expr:?}"
1139+
)
11041140
}
1105-
}
1141+
},
11061142
})
11071143
.collect::<Result<Vec<_>>>()?;
11081144

datafusion/sqllogictest/test_files/struct.slt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,3 +595,40 @@ Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_
595595

596596
statement ok
597597
drop table t;
598+
599+
600+
# Test struct field access with subscript notation
601+
# This tests accessing struct fields using the subscript notation with string literals
602+
603+
statement ok
604+
create table test (struct_field struct(substruct int)) as values (struct(1));
605+
606+
query ??
607+
select *
608+
from test as test1, test as test2 where
609+
test1.struct_field['substruct'] = test2.struct_field['substruct'];
610+
----
611+
{substruct: 1} {substruct: 1}
612+
613+
statement ok
614+
DROP TABLE test;
615+
616+
statement ok
617+
create table test (struct_field struct(substruct struct(subsubstruct int))) as values (struct(struct(1)));
618+
619+
query ??
620+
select *
621+
from test as test1, test as test2 where
622+
test1.struct_field.substruct['subsubstruct'] = test2.struct_field.substruct['subsubstruct'];
623+
----
624+
{substruct: {subsubstruct: 1}} {substruct: {subsubstruct: 1}}
625+
626+
query ??
627+
select *
628+
from test AS test1, test AS test2 where
629+
test1.struct_field['substruct']['subsubstruct'] = test2.struct_field['substruct']['subsubstruct'];
630+
----
631+
{substruct: {subsubstruct: 1}} {substruct: {subsubstruct: 1}}
632+
633+
statement ok
634+
drop table test;

0 commit comments

Comments
 (0)