1717
1818mod kernels;
1919
20- use crate :: expressions:: binary:: kernels:: concat_elements_utf8view;
2120use crate :: intervals:: cp_solver:: { propagate_arithmetic, propagate_comparison} ;
2221use crate :: PhysicalExpr ;
2322use std:: hash:: Hash ;
2423use std:: { any:: Any , sync:: Arc } ;
2524
2625use arrow:: array:: * ;
27- use arrow:: compute:: kernels:: boolean:: { and_kleene, not , or_kleene} ;
26+ use arrow:: compute:: kernels:: boolean:: { and_kleene, or_kleene} ;
2827use arrow:: compute:: kernels:: cmp:: * ;
29- use arrow:: compute:: kernels:: comparison:: { regexp_is_match, regexp_is_match_scalar} ;
3028use arrow:: compute:: kernels:: concat_elements:: concat_elements_utf8;
3129use arrow:: compute:: {
3230 cast, filter_record_batch, ilike, like, nilike, nlike, SlicesIterator ,
@@ -50,6 +48,7 @@ use kernels::{
5048 bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar,
5149 bitwise_shift_left_dyn, bitwise_shift_left_dyn_scalar, bitwise_shift_right_dyn,
5250 bitwise_shift_right_dyn_scalar, bitwise_xor_dyn, bitwise_xor_dyn_scalar,
51+ concat_elements_utf8view, regex_match_dyn, regex_match_dyn_scalar,
5352} ;
5453
5554/// Binary expression
@@ -166,177 +165,6 @@ fn boolean_op(
166165 op ( ll, rr) . map ( |t| Arc :: new ( t) as _ )
167166}
168167
169- macro_rules! binary_string_array_flag_op {
170- ( $LEFT: expr, $RIGHT: expr, $OP: ident, $NOT: expr, $FLAG: expr) => { {
171- match $LEFT. data_type( ) {
172- DataType :: Utf8 => {
173- compute_utf8_flag_op!( $LEFT, $RIGHT, $OP, StringArray , $NOT, $FLAG)
174- } ,
175- DataType :: Utf8View => {
176- compute_utf8view_flag_op!( $LEFT, $RIGHT, $OP, StringViewArray , $NOT, $FLAG)
177- }
178- DataType :: LargeUtf8 => {
179- compute_utf8_flag_op!( $LEFT, $RIGHT, $OP, LargeStringArray , $NOT, $FLAG)
180- } ,
181- other => internal_err!(
182- "Data type {} not supported for binary_string_array_flag_op operation '{}' on string array" ,
183- other, stringify!( $OP)
184- ) ,
185- }
186- } } ;
187- }
188-
189- /// Invoke a compute kernel on a pair of binary data arrays with flags
190- macro_rules! compute_utf8_flag_op {
191- ( $LEFT: expr, $RIGHT: expr, $OP: ident, $ARRAYTYPE: ident, $NOT: expr, $FLAG: expr) => { {
192- let ll = $LEFT
193- . as_any( )
194- . downcast_ref:: <$ARRAYTYPE>( )
195- . expect( "compute_utf8_flag_op failed to downcast array" ) ;
196- let rr = $RIGHT
197- . as_any( )
198- . downcast_ref:: <$ARRAYTYPE>( )
199- . expect( "compute_utf8_flag_op failed to downcast array" ) ;
200-
201- let flag = if $FLAG {
202- Some ( $ARRAYTYPE:: from( vec![ "i" ; ll. len( ) ] ) )
203- } else {
204- None
205- } ;
206- let mut array = $OP( ll, rr, flag. as_ref( ) ) ?;
207- if $NOT {
208- array = not( & array) . unwrap( ) ;
209- }
210- Ok ( Arc :: new( array) )
211- } } ;
212- }
213-
214- /// Invoke a compute kernel on a pair of binary data arrays with flags
215- macro_rules! compute_utf8view_flag_op {
216- ( $LEFT: expr, $RIGHT: expr, $OP: ident, $ARRAYTYPE: ident, $NOT: expr, $FLAG: expr) => { {
217- let ll = $LEFT
218- . as_any( )
219- . downcast_ref:: <$ARRAYTYPE>( )
220- . expect( "compute_utf8view_flag_op failed to downcast array" ) ;
221- let rr = $RIGHT
222- . as_any( )
223- . downcast_ref:: <$ARRAYTYPE>( )
224- . expect( "compute_utf8view_flag_op failed to downcast array" ) ;
225-
226- let flag = if $FLAG {
227- Some ( $ARRAYTYPE:: from( vec![ "i" ; ll. len( ) ] ) )
228- } else {
229- None
230- } ;
231- let mut array = $OP( ll, rr, flag. as_ref( ) ) ?;
232- if $NOT {
233- array = not( & array) . unwrap( ) ;
234- }
235- Ok ( Arc :: new( array) )
236- } } ;
237- }
238-
239- macro_rules! binary_string_array_flag_op_scalar {
240- ( $LEFT: ident, $RIGHT: expr, $OP: ident, $NOT: expr, $FLAG: expr) => { {
241- // This macro is slightly different from binary_string_array_flag_op because, when comparing with a scalar value,
242- // the query can be optimized in such a way that operands will be dicts, so we need to support it here
243- let result: Result <Arc <dyn Array >> = match $LEFT. data_type( ) {
244- DataType :: Utf8 => {
245- compute_utf8_flag_op_scalar!( $LEFT, $RIGHT, $OP, StringArray , $NOT, $FLAG)
246- } ,
247- DataType :: Utf8View => {
248- compute_utf8view_flag_op_scalar!( $LEFT, $RIGHT, $OP, StringViewArray , $NOT, $FLAG)
249- }
250- DataType :: LargeUtf8 => {
251- compute_utf8_flag_op_scalar!( $LEFT, $RIGHT, $OP, LargeStringArray , $NOT, $FLAG)
252- } ,
253- DataType :: Dictionary ( _, _) => {
254- let values = $LEFT. as_any_dictionary( ) . values( ) ;
255-
256- match values. data_type( ) {
257- DataType :: Utf8 => compute_utf8_flag_op_scalar!( values, $RIGHT, $OP, StringArray , $NOT, $FLAG) ,
258- DataType :: Utf8View => compute_utf8view_flag_op_scalar!( values, $RIGHT, $OP, StringViewArray , $NOT, $FLAG) ,
259- DataType :: LargeUtf8 => compute_utf8_flag_op_scalar!( values, $RIGHT, $OP, LargeStringArray , $NOT, $FLAG) ,
260- other => internal_err!(
261- "Data type {} not supported as a dictionary value type for binary_string_array_flag_op_scalar operation '{}' on string array" ,
262- other, stringify!( $OP)
263- ) ,
264- } . map(
265- // downcast_dictionary_array duplicates code per possible key type, so we aim to do all prep work before
266- |evaluated_values| downcast_dictionary_array! {
267- $LEFT => {
268- let unpacked_dict = evaluated_values. take_iter( $LEFT. keys( ) . iter( ) . map( |opt| opt. map( |v| v as _) ) ) . collect:: <BooleanArray >( ) ;
269- Arc :: new( unpacked_dict) as _
270- } ,
271- _ => unreachable!( ) ,
272- }
273- )
274- } ,
275- other => internal_err!(
276- "Data type {} not supported for binary_string_array_flag_op_scalar operation '{}' on string array" ,
277- other, stringify!( $OP)
278- ) ,
279- } ;
280- Some ( result)
281- } } ;
282- }
283-
284- /// Invoke a compute kernel on a data array and a scalar value with flag
285- macro_rules! compute_utf8_flag_op_scalar {
286- ( $LEFT: expr, $RIGHT: expr, $OP: ident, $ARRAYTYPE: ident, $NOT: expr, $FLAG: expr) => { {
287- let ll = $LEFT
288- . as_any( )
289- . downcast_ref:: <$ARRAYTYPE>( )
290- . expect( "compute_utf8_flag_op_scalar failed to downcast array" ) ;
291-
292- let string_value = match $RIGHT. try_as_str( ) {
293- Some ( Some ( string_value) ) => string_value,
294- // null literal or non string
295- _ => return internal_err!(
296- "compute_utf8_flag_op_scalar failed to cast literal value {} for operation '{}'" ,
297- $RIGHT, stringify!( $OP)
298- )
299- } ;
300-
301- let flag = $FLAG. then_some( "i" ) ;
302- let mut array =
303- paste:: expr! { [ <$OP _scalar>] } ( ll, & string_value, flag) ?;
304- if $NOT {
305- array = not( & array) . unwrap( ) ;
306- }
307-
308- Ok ( Arc :: new( array) )
309- } } ;
310- }
311-
312- /// Invoke a compute kernel on a data array and a scalar value with flag
313- macro_rules! compute_utf8view_flag_op_scalar {
314- ( $LEFT: expr, $RIGHT: expr, $OP: ident, $ARRAYTYPE: ident, $NOT: expr, $FLAG: expr) => { {
315- let ll = $LEFT
316- . as_any( )
317- . downcast_ref:: <$ARRAYTYPE>( )
318- . expect( "compute_utf8view_flag_op_scalar failed to downcast array" ) ;
319-
320- let string_value = match $RIGHT. try_as_str( ) {
321- Some ( Some ( string_value) ) => string_value,
322- // null literal or non string
323- _ => return internal_err!(
324- "compute_utf8view_flag_op_scalar failed to cast literal value {} for operation '{}'" ,
325- $RIGHT, stringify!( $OP)
326- )
327- } ;
328-
329- let flag = $FLAG. then_some( "i" ) ;
330- let mut array =
331- paste:: expr! { [ <$OP _scalar>] } ( ll, & string_value, flag) ?;
332- if $NOT {
333- array = not( & array) . unwrap( ) ;
334- }
335-
336- Ok ( Arc :: new( array) )
337- } } ;
338- }
339-
340168impl PhysicalExpr for BinaryExpr {
341169 /// Return a reference to Any that can be used for downcasting
342170 fn as_any ( & self ) -> & dyn Any {
@@ -752,34 +580,10 @@ impl BinaryExpr {
752580 ) -> Result < Option < Result < ArrayRef > > > {
753581 use Operator :: * ;
754582 let scalar_result = match & self . op {
755- RegexMatch => binary_string_array_flag_op_scalar ! (
756- array,
757- scalar,
758- regexp_is_match,
759- false ,
760- false
761- ) ,
762- RegexIMatch => binary_string_array_flag_op_scalar ! (
763- array,
764- scalar,
765- regexp_is_match,
766- false ,
767- true
768- ) ,
769- RegexNotMatch => binary_string_array_flag_op_scalar ! (
770- array,
771- scalar,
772- regexp_is_match,
773- true ,
774- false
775- ) ,
776- RegexNotIMatch => binary_string_array_flag_op_scalar ! (
777- array,
778- scalar,
779- regexp_is_match,
780- true ,
781- true
782- ) ,
583+ RegexMatch => regex_match_dyn_scalar ( array, scalar, false , false ) ,
584+ RegexIMatch => regex_match_dyn_scalar ( array, scalar, false , true ) ,
585+ RegexNotMatch => regex_match_dyn_scalar ( array, scalar, true , false ) ,
586+ RegexNotIMatch => regex_match_dyn_scalar ( array, scalar, true , true ) ,
783587 BitwiseAnd => bitwise_and_dyn_scalar ( array, scalar) ,
784588 BitwiseOr => bitwise_or_dyn_scalar ( array, scalar) ,
785589 BitwiseXor => bitwise_xor_dyn_scalar ( array, scalar) ,
@@ -828,18 +632,10 @@ impl BinaryExpr {
828632 )
829633 }
830634 }
831- RegexMatch => {
832- binary_string_array_flag_op ! ( left, right, regexp_is_match, false , false )
833- }
834- RegexIMatch => {
835- binary_string_array_flag_op ! ( left, right, regexp_is_match, false , true )
836- }
837- RegexNotMatch => {
838- binary_string_array_flag_op ! ( left, right, regexp_is_match, true , false )
839- }
840- RegexNotIMatch => {
841- binary_string_array_flag_op ! ( left, right, regexp_is_match, true , true )
842- }
635+ RegexMatch => regex_match_dyn ( left, right, false , false ) ,
636+ RegexIMatch => regex_match_dyn ( left, right, false , true ) ,
637+ RegexNotMatch => regex_match_dyn ( left, right, true , false ) ,
638+ RegexNotIMatch => regex_match_dyn ( left, right, true , true ) ,
843639 BitwiseAnd => bitwise_and_dyn ( left, right) ,
844640 BitwiseOr => bitwise_or_dyn ( left, right) ,
845641 BitwiseXor => bitwise_xor_dyn ( left, right) ,
0 commit comments