apache · alamb · Oct 15, 2025 · Oct 8, 2025 · Oct 9, 2025 · Oct 10, 2025
diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 
 use crate::utils::scatter;
 
-use arrow::array::{ArrayRef, BooleanArray};
+use arrow::array::{make_builder, ArrayBuilder, ArrayRef, BooleanArray};
 use arrow::compute::filter_record_batch;
 use arrow::datatypes::{DataType, Field, FieldRef, Schema};
 use arrow::record_batch::RecordBatch;
@@ -97,14 +97,26 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash {
         batch: &RecordBatch,
         selection: &BooleanArray,
     ) -> Result<ColumnarValue> {
-        let tmp_batch = filter_record_batch(batch, selection)?;
+        let selection_count = selection.true_count();
 
-        let tmp_result = self.evaluate(&tmp_batch)?;
+        if batch.num_rows() == 0 || selection_count == batch.num_rows() {
-        if batch.num_rows() == 0 || selection_count == batch.num_rows() {
+        if selection_count == batch.num_rows() {
-        if batch.num_rows() == 0 || selection_count == batch.num_rows() {
+        if selection_count == batch.num_rows() {
+            // Skip filtering logic if possible
+            return self.evaluate(batch);
+        }
 
-        if batch.num_rows() == tmp_batch.num_rows() {
-            // All values from the `selection` filter are true.
-            Ok(tmp_result)
-        } else if let ColumnarValue::Array(a) = tmp_result {
+        let tmp_result = if selection_count == 0 {
+            // Do not call `evaluate` when the selection is empty.
+            // When `evaluate_selection` is being used for conditional, lazy evaluation,
+            // evaluating an expression for a false selection vector may end up unintentionally
+            // evaluating a fallible expression.
+            let datatype = self.data_type(batch.schema_ref().as_ref())?;
+            ColumnarValue::Array(make_builder(&datatype, 0).finish())
+        } else {
+            let filtered_batch = filter_record_batch(batch, selection)?;
+            self.evaluate(&filtered_batch)?
+        };
+
+        if let ColumnarValue::Array(a) = tmp_result {
             scatter(selection, a.as_ref()).map(ColumnarValue::Array)
         } else if let ColumnarValue::Scalar(ScalarValue::Boolean(value)) = &tmp_result {
             // When the scalar is true or false, skip the scatter process

diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
@@ -155,10 +155,7 @@ impl CaseExpr {
                 && else_expr.as_ref().unwrap().as_any().is::<Literal>()
             {
                 EvalMethod::ScalarOrScalar
-            } else if when_then_expr.len() == 1
-                && is_cheap_and_infallible(&(when_then_expr[0].1))
-                && else_expr.as_ref().is_some_and(is_cheap_and_infallible)
-            {
+            } else if when_then_expr.len() == 1 && else_expr.is_some() {
                 EvalMethod::ExpressionOrExpression
             } else {
                 EvalMethod::NoExpression
@@ -425,6 +422,16 @@ impl CaseExpr {
             )
         })?;
 
+        // For the true and false/null selection vectors, bypass `evaluate_selection` and merging
+        // results. This avoids materializing the array for the other branch which we will discard
+        // entirely anyway.
+        let true_count = when_value.true_count();
+        if true_count == batch.num_rows() {
+            return self.when_then_expr[0].1.evaluate(batch);
+        } else if true_count == 0 {
+            return self.else_expr.as_ref().unwrap().evaluate(batch);
+        }
+
         // Treat 'NULL' as false value
         let when_value = match when_value.null_count() {
             0 => Cow::Borrowed(when_value),