Skip to content

Commit eb2aa96

Browse files
sunng87tobixdev
authored andcommitted
refactor: cleanup naming and macro usages for binary operator (apache#17985)
* refactor: rename macros full clear meaning * refactor: cleanup and simplify regexp match macros * refactor: update error messages
1 parent 6126390 commit eb2aa96

File tree

2 files changed

+172
-246
lines changed

2 files changed

+172
-246
lines changed

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 10 additions & 214 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,14 @@
1717

1818
mod kernels;
1919

20-
use crate::expressions::binary::kernels::concat_elements_utf8view;
2120
use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison};
2221
use crate::PhysicalExpr;
2322
use std::hash::Hash;
2423
use std::{any::Any, sync::Arc};
2524

2625
use arrow::array::*;
27-
use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
26+
use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
2827
use arrow::compute::kernels::cmp::*;
29-
use arrow::compute::kernels::comparison::{regexp_is_match, regexp_is_match_scalar};
3028
use arrow::compute::kernels::concat_elements::concat_elements_utf8;
3129
use arrow::compute::{
3230
cast, filter_record_batch, ilike, like, nilike, nlike, SlicesIterator,
@@ -50,6 +48,7 @@ use kernels::{
5048
bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar,
5149
bitwise_shift_left_dyn, bitwise_shift_left_dyn_scalar, bitwise_shift_right_dyn,
5250
bitwise_shift_right_dyn_scalar, bitwise_xor_dyn, bitwise_xor_dyn_scalar,
51+
concat_elements_utf8view, regex_match_dyn, regex_match_dyn_scalar,
5352
};
5453

5554
/// Binary expression
@@ -166,177 +165,6 @@ fn boolean_op(
166165
op(ll, rr).map(|t| Arc::new(t) as _)
167166
}
168167

169-
macro_rules! binary_string_array_flag_op {
170-
($LEFT:expr, $RIGHT:expr, $OP:ident, $NOT:expr, $FLAG:expr) => {{
171-
match $LEFT.data_type() {
172-
DataType::Utf8 => {
173-
compute_utf8_flag_op!($LEFT, $RIGHT, $OP, StringArray, $NOT, $FLAG)
174-
},
175-
DataType::Utf8View => {
176-
compute_utf8view_flag_op!($LEFT, $RIGHT, $OP, StringViewArray, $NOT, $FLAG)
177-
}
178-
DataType::LargeUtf8 => {
179-
compute_utf8_flag_op!($LEFT, $RIGHT, $OP, LargeStringArray, $NOT, $FLAG)
180-
},
181-
other => internal_err!(
182-
"Data type {} not supported for binary_string_array_flag_op operation '{}' on string array",
183-
other, stringify!($OP)
184-
),
185-
}
186-
}};
187-
}
188-
189-
/// Invoke a compute kernel on a pair of binary data arrays with flags
190-
macro_rules! compute_utf8_flag_op {
191-
($LEFT:expr, $RIGHT:expr, $OP:ident, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
192-
let ll = $LEFT
193-
.as_any()
194-
.downcast_ref::<$ARRAYTYPE>()
195-
.expect("compute_utf8_flag_op failed to downcast array");
196-
let rr = $RIGHT
197-
.as_any()
198-
.downcast_ref::<$ARRAYTYPE>()
199-
.expect("compute_utf8_flag_op failed to downcast array");
200-
201-
let flag = if $FLAG {
202-
Some($ARRAYTYPE::from(vec!["i"; ll.len()]))
203-
} else {
204-
None
205-
};
206-
let mut array = $OP(ll, rr, flag.as_ref())?;
207-
if $NOT {
208-
array = not(&array).unwrap();
209-
}
210-
Ok(Arc::new(array))
211-
}};
212-
}
213-
214-
/// Invoke a compute kernel on a pair of binary data arrays with flags
215-
macro_rules! compute_utf8view_flag_op {
216-
($LEFT:expr, $RIGHT:expr, $OP:ident, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
217-
let ll = $LEFT
218-
.as_any()
219-
.downcast_ref::<$ARRAYTYPE>()
220-
.expect("compute_utf8view_flag_op failed to downcast array");
221-
let rr = $RIGHT
222-
.as_any()
223-
.downcast_ref::<$ARRAYTYPE>()
224-
.expect("compute_utf8view_flag_op failed to downcast array");
225-
226-
let flag = if $FLAG {
227-
Some($ARRAYTYPE::from(vec!["i"; ll.len()]))
228-
} else {
229-
None
230-
};
231-
let mut array = $OP(ll, rr, flag.as_ref())?;
232-
if $NOT {
233-
array = not(&array).unwrap();
234-
}
235-
Ok(Arc::new(array))
236-
}};
237-
}
238-
239-
macro_rules! binary_string_array_flag_op_scalar {
240-
($LEFT:ident, $RIGHT:expr, $OP:ident, $NOT:expr, $FLAG:expr) => {{
241-
// This macro is slightly different from binary_string_array_flag_op because, when comparing with a scalar value,
242-
// the query can be optimized in such a way that operands will be dicts, so we need to support it here
243-
let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
244-
DataType::Utf8 => {
245-
compute_utf8_flag_op_scalar!($LEFT, $RIGHT, $OP, StringArray, $NOT, $FLAG)
246-
},
247-
DataType::Utf8View => {
248-
compute_utf8view_flag_op_scalar!($LEFT, $RIGHT, $OP, StringViewArray, $NOT, $FLAG)
249-
}
250-
DataType::LargeUtf8 => {
251-
compute_utf8_flag_op_scalar!($LEFT, $RIGHT, $OP, LargeStringArray, $NOT, $FLAG)
252-
},
253-
DataType::Dictionary(_, _) => {
254-
let values = $LEFT.as_any_dictionary().values();
255-
256-
match values.data_type() {
257-
DataType::Utf8 => compute_utf8_flag_op_scalar!(values, $RIGHT, $OP, StringArray, $NOT, $FLAG),
258-
DataType::Utf8View => compute_utf8view_flag_op_scalar!(values, $RIGHT, $OP, StringViewArray, $NOT, $FLAG),
259-
DataType::LargeUtf8 => compute_utf8_flag_op_scalar!(values, $RIGHT, $OP, LargeStringArray, $NOT, $FLAG),
260-
other => internal_err!(
261-
"Data type {} not supported as a dictionary value type for binary_string_array_flag_op_scalar operation '{}' on string array",
262-
other, stringify!($OP)
263-
),
264-
}.map(
265-
// downcast_dictionary_array duplicates code per possible key type, so we aim to do all prep work before
266-
|evaluated_values| downcast_dictionary_array! {
267-
$LEFT => {
268-
let unpacked_dict = evaluated_values.take_iter($LEFT.keys().iter().map(|opt| opt.map(|v| v as _))).collect::<BooleanArray>();
269-
Arc::new(unpacked_dict) as _
270-
},
271-
_ => unreachable!(),
272-
}
273-
)
274-
},
275-
other => internal_err!(
276-
"Data type {} not supported for binary_string_array_flag_op_scalar operation '{}' on string array",
277-
other, stringify!($OP)
278-
),
279-
};
280-
Some(result)
281-
}};
282-
}
283-
284-
/// Invoke a compute kernel on a data array and a scalar value with flag
285-
macro_rules! compute_utf8_flag_op_scalar {
286-
($LEFT:expr, $RIGHT:expr, $OP:ident, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
287-
let ll = $LEFT
288-
.as_any()
289-
.downcast_ref::<$ARRAYTYPE>()
290-
.expect("compute_utf8_flag_op_scalar failed to downcast array");
291-
292-
let string_value = match $RIGHT.try_as_str() {
293-
Some(Some(string_value)) => string_value,
294-
// null literal or non string
295-
_ => return internal_err!(
296-
"compute_utf8_flag_op_scalar failed to cast literal value {} for operation '{}'",
297-
$RIGHT, stringify!($OP)
298-
)
299-
};
300-
301-
let flag = $FLAG.then_some("i");
302-
let mut array =
303-
paste::expr! {[<$OP _scalar>]}(ll, &string_value, flag)?;
304-
if $NOT {
305-
array = not(&array).unwrap();
306-
}
307-
308-
Ok(Arc::new(array))
309-
}};
310-
}
311-
312-
/// Invoke a compute kernel on a data array and a scalar value with flag
313-
macro_rules! compute_utf8view_flag_op_scalar {
314-
($LEFT:expr, $RIGHT:expr, $OP:ident, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
315-
let ll = $LEFT
316-
.as_any()
317-
.downcast_ref::<$ARRAYTYPE>()
318-
.expect("compute_utf8view_flag_op_scalar failed to downcast array");
319-
320-
let string_value = match $RIGHT.try_as_str() {
321-
Some(Some(string_value)) => string_value,
322-
// null literal or non string
323-
_ => return internal_err!(
324-
"compute_utf8view_flag_op_scalar failed to cast literal value {} for operation '{}'",
325-
$RIGHT, stringify!($OP)
326-
)
327-
};
328-
329-
let flag = $FLAG.then_some("i");
330-
let mut array =
331-
paste::expr! {[<$OP _scalar>]}(ll, &string_value, flag)?;
332-
if $NOT {
333-
array = not(&array).unwrap();
334-
}
335-
336-
Ok(Arc::new(array))
337-
}};
338-
}
339-
340168
impl PhysicalExpr for BinaryExpr {
341169
/// Return a reference to Any that can be used for downcasting
342170
fn as_any(&self) -> &dyn Any {
@@ -752,34 +580,10 @@ impl BinaryExpr {
752580
) -> Result<Option<Result<ArrayRef>>> {
753581
use Operator::*;
754582
let scalar_result = match &self.op {
755-
RegexMatch => binary_string_array_flag_op_scalar!(
756-
array,
757-
scalar,
758-
regexp_is_match,
759-
false,
760-
false
761-
),
762-
RegexIMatch => binary_string_array_flag_op_scalar!(
763-
array,
764-
scalar,
765-
regexp_is_match,
766-
false,
767-
true
768-
),
769-
RegexNotMatch => binary_string_array_flag_op_scalar!(
770-
array,
771-
scalar,
772-
regexp_is_match,
773-
true,
774-
false
775-
),
776-
RegexNotIMatch => binary_string_array_flag_op_scalar!(
777-
array,
778-
scalar,
779-
regexp_is_match,
780-
true,
781-
true
782-
),
583+
RegexMatch => regex_match_dyn_scalar(array, scalar, false, false),
584+
RegexIMatch => regex_match_dyn_scalar(array, scalar, false, true),
585+
RegexNotMatch => regex_match_dyn_scalar(array, scalar, true, false),
586+
RegexNotIMatch => regex_match_dyn_scalar(array, scalar, true, true),
783587
BitwiseAnd => bitwise_and_dyn_scalar(array, scalar),
784588
BitwiseOr => bitwise_or_dyn_scalar(array, scalar),
785589
BitwiseXor => bitwise_xor_dyn_scalar(array, scalar),
@@ -828,18 +632,10 @@ impl BinaryExpr {
828632
)
829633
}
830634
}
831-
RegexMatch => {
832-
binary_string_array_flag_op!(left, right, regexp_is_match, false, false)
833-
}
834-
RegexIMatch => {
835-
binary_string_array_flag_op!(left, right, regexp_is_match, false, true)
836-
}
837-
RegexNotMatch => {
838-
binary_string_array_flag_op!(left, right, regexp_is_match, true, false)
839-
}
840-
RegexNotIMatch => {
841-
binary_string_array_flag_op!(left, right, regexp_is_match, true, true)
842-
}
635+
RegexMatch => regex_match_dyn(left, right, false, false),
636+
RegexIMatch => regex_match_dyn(left, right, false, true),
637+
RegexNotMatch => regex_match_dyn(left, right, true, false),
638+
RegexNotIMatch => regex_match_dyn(left, right, true, true),
843639
BitwiseAnd => bitwise_and_dyn(left, right),
844640
BitwiseOr => bitwise_or_dyn(left, right),
845641
BitwiseXor => bitwise_xor_dyn(left, right),

0 commit comments

Comments
 (0)