Skip to content

Commit

Permalink
[C++]: support casting nullable fields to non-nullable if there are n…
Browse files Browse the repository at this point in the history
…o null values

Fixes #33592
  • Loading branch information
NickCrews committed Feb 11, 2025
1 parent 1b45e35 commit 75dd13b
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 24 deletions.
29 changes: 17 additions & 12 deletions cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,6 @@ struct CastStruct {
const auto& in_field = in_type.field(in_field_index);
// If there are more in_fields check if they match the out_field.
if (in_field->name() == out_field->name()) {
if (in_field->nullable() && !out_field->nullable()) {
return Status::TypeError("cannot cast nullable field to non-nullable field: ",
in_type.ToString(), " ", out_type.ToString());
}
// Found matching in_field and out_field.
fields_to_select[out_field_index++] = in_field_index;
// Using the same in_field for multiple out_fields is not allowed.
Expand Down Expand Up @@ -403,17 +399,26 @@ struct CastStruct {
}

int out_field_index = 0;
for (int field_index : fields_to_select) {
const auto& target_type = out->type()->field(out_field_index++)->type();
if (field_index == kFillNullSentinel) {
ARROW_ASSIGN_OR_RAISE(auto nulls,
MakeArrayOfNull(target_type->GetSharedPtr(), batch.length));
for (int in_field_index : fields_to_select) {
const auto& out_field = out_type.field(out_field_index++);
const auto& out_field_type = out_field->type();
if (in_field_index == kFillNullSentinel) {
ARROW_ASSIGN_OR_RAISE(
auto nulls, MakeArrayOfNull(out_field_type->GetSharedPtr(), batch.length));
out_array->child_data.push_back(nulls->data());
} else {
const auto& values = (in_array.child_data[field_index].ToArrayData()->Slice(
const auto& in_field = in_type.field(in_field_index);
const auto& in_values = (in_array.child_data[in_field_index].ToArrayData()->Slice(
in_array.offset, in_array.length));
ARROW_ASSIGN_OR_RAISE(Datum cast_values,
Cast(values, target_type, options, ctx->exec_context()));
if (in_field->nullable() && !out_field->nullable() &&
in_values->GetNullCount() > 0) {
return Status::Invalid(
"field '", in_field->name(), "' of type ", in_field->type()->ToString(),
" has nulls. Can't cast to non-nullable field '", out_field->name(),
"' of type ", out_field_type->ToString());
}
ARROW_ASSIGN_OR_RAISE(Datum cast_values, Cast(in_values, out_field_type, options,
ctx->exec_context()));
DCHECK(cast_values.is_array());
out_array->child_data.push_back(cast_values.array());
}
Expand Down
29 changes: 17 additions & 12 deletions cpp/src/arrow/compute/kernels/scalar_cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4119,7 +4119,7 @@ TEST(Cast, StructToDifferentNullabilityStruct) {
CheckCast(src_non_nullable, dest3_nullable);
}
{
// But NOT OK to go from nullable to non-nullable...
// But when going from nullable to non-nullable, all data must be non-null...
std::vector<std::shared_ptr<Field>> fields_src_nullable = {
std::make_shared<Field>("a", int8(), true),
std::make_shared<Field>("b", int8(), true),
Expand All @@ -4140,8 +4140,10 @@ TEST(Cast, StructToDifferentNullabilityStruct) {
const auto dest1_non_nullable = arrow::struct_(fields_dest1_non_nullable);
const auto options1_non_nullable = CastOptions::Safe(dest1_non_nullable);
EXPECT_RAISES_WITH_MESSAGE_THAT(
TypeError,
::testing::HasSubstr("cannot cast nullable field to non-nullable field"),
Invalid,
::testing::HasSubstr(
"field 'a' of type int8 has nulls. Can't cast to non-nullable field 'a' "
"of type int64"),
Cast(src_nullable, options1_non_nullable));

std::vector<std::shared_ptr<Field>> fields_dest2_non_nullable = {
Expand All @@ -4150,18 +4152,21 @@ TEST(Cast, StructToDifferentNullabilityStruct) {
const auto dest2_non_nullable = arrow::struct_(fields_dest2_non_nullable);
const auto options2_non_nullable = CastOptions::Safe(dest2_non_nullable);
EXPECT_RAISES_WITH_MESSAGE_THAT(
TypeError,
::testing::HasSubstr("cannot cast nullable field to non-nullable field"),
Invalid,
::testing::HasSubstr(
"field 'a' of type int8 has nulls. Can't cast to non-nullable field 'a' "
"of type int64"),
Cast(src_nullable, options2_non_nullable));

std::vector<std::shared_ptr<Field>> fields_dest3_non_nullable = {
std::shared_ptr<Array> c_dest_no_nulls;
c_dest_no_nulls = ArrayFromJSON(int64(), "[9, 11, 44]");
std::vector<std::shared_ptr<Field>> fields_dest_no_nulls = {
std::make_shared<Field>("c", int64(), false)};
const auto dest3_non_nullable = arrow::struct_(fields_dest3_non_nullable);
const auto options3_non_nullable = CastOptions::Safe(dest3_non_nullable);
EXPECT_RAISES_WITH_MESSAGE_THAT(
TypeError,
::testing::HasSubstr("cannot cast nullable field to non-nullable field"),
Cast(src_nullable, options3_non_nullable));
ASSERT_OK_AND_ASSIGN(auto dest_no_nulls,
StructArray::Make({c_dest_no_nulls}, fields_dest_no_nulls));
const auto options3_non_nullable =
CastOptions::Safe(arrow::struct_(fields_dest_no_nulls));
CheckCast(src_nullable, dest_no_nulls, options3_non_nullable);
}
}

Expand Down

0 comments on commit 75dd13b

Please sign in to comment.