@@ -13,6 +13,8 @@ pub enum CategoricalFunction {
13
13
StartsWith ( String ) ,
14
14
#[ cfg( feature = "strings" ) ]
15
15
EndsWith ( String ) ,
16
+ #[ cfg( feature = "strings" ) ]
17
+ Slice ( i64 , Option < usize > ) ,
16
18
}
17
19
18
20
impl CategoricalFunction {
@@ -28,6 +30,8 @@ impl CategoricalFunction {
28
30
StartsWith ( _) => mapper. with_dtype ( DataType :: Boolean ) ,
29
31
#[ cfg( feature = "strings" ) ]
30
32
EndsWith ( _) => mapper. with_dtype ( DataType :: Boolean ) ,
33
+ #[ cfg( feature = "strings" ) ]
34
+ Slice ( _, _) => mapper. with_dtype ( DataType :: String ) ,
31
35
}
32
36
}
33
37
}
@@ -45,6 +49,8 @@ impl Display for CategoricalFunction {
45
49
StartsWith ( _) => "starts_with" ,
46
50
#[ cfg( feature = "strings" ) ]
47
51
EndsWith ( _) => "ends_with" ,
52
+ #[ cfg( feature = "strings" ) ]
53
+ Slice ( _, _) => "slice" ,
48
54
} ;
49
55
write ! ( f, "cat.{s}" )
50
56
}
@@ -63,6 +69,8 @@ impl From<CategoricalFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
63
69
StartsWith ( prefix) => map ! ( starts_with, prefix. as_str( ) ) ,
64
70
#[ cfg( feature = "strings" ) ]
65
71
EndsWith ( suffix) => map ! ( ends_with, suffix. as_str( ) ) ,
72
+ #[ cfg( feature = "strings" ) ]
73
+ Slice ( offset, length) => map ! ( slice, offset, length) ,
66
74
}
67
75
}
68
76
}
@@ -101,12 +109,14 @@ fn _get_cat_phys_map(ca: &CategoricalChunked) -> (StringChunked, Series) {
101
109
102
110
/// Fast path: apply a string function to the categories of a categorical column and broadcast the
103
111
/// result back to the array.
104
- fn apply_to_cats < F , T > ( ca : & CategoricalChunked , mut op : F ) -> PolarsResult < Column >
112
+ // fn apply_to_cats<F, T>(ca: &CategoricalChunked, mut op: F) -> PolarsResult<Column>
113
+ fn apply_to_cats < F , T > ( c : & Column , mut op : F ) -> PolarsResult < Column >
105
114
where
106
115
F : FnMut ( & StringChunked ) -> ChunkedArray < T > ,
107
116
ChunkedArray < T > : IntoSeries ,
108
117
T : PolarsDataType < HasViews = FalseT , IsStruct = FalseT , IsNested = FalseT > ,
109
118
{
119
+ let ca = c. categorical ( ) ?;
110
120
let ( categories, phys) = _get_cat_phys_map ( ca) ;
111
121
let result = op ( & categories) ;
112
122
// SAFETY: physical idx array is valid.
@@ -116,12 +126,13 @@ where
116
126
117
127
/// Fast path: apply a binary function to the categories of a categorical column and broadcast the
118
128
/// result back to the array.
119
- fn apply_to_cats_binary < F , T > ( ca : & CategoricalChunked , mut op : F ) -> PolarsResult < Column >
129
+ fn apply_to_cats_binary < F , T > ( c : & Column , mut op : F ) -> PolarsResult < Column >
120
130
where
121
131
F : FnMut ( & BinaryChunked ) -> ChunkedArray < T > ,
122
132
ChunkedArray < T > : IntoSeries ,
123
133
T : PolarsDataType < HasViews = FalseT , IsStruct = FalseT , IsNested = FalseT > ,
124
134
{
135
+ let ca = c. categorical ( ) ?;
125
136
let ( categories, phys) = _get_cat_phys_map ( ca) ;
126
137
let result = op ( & categories. as_binary ( ) ) ;
127
138
// SAFETY: physical idx array is valid.
@@ -130,25 +141,38 @@ where
130
141
}
131
142
132
143
#[ cfg( feature = "strings" ) ]
133
- fn len_bytes ( s : & Column ) -> PolarsResult < Column > {
134
- let ca = s. categorical ( ) ?;
135
- apply_to_cats ( ca, |s| s. str_len_bytes ( ) )
144
+ fn len_bytes ( c : & Column ) -> PolarsResult < Column > {
145
+ apply_to_cats ( c, |s| s. str_len_bytes ( ) )
136
146
}
137
147
138
148
#[ cfg( feature = "strings" ) ]
139
- fn len_chars ( s : & Column ) -> PolarsResult < Column > {
140
- let ca = s. categorical ( ) ?;
141
- apply_to_cats ( ca, |s| s. str_len_chars ( ) )
149
+ fn len_chars ( c : & Column ) -> PolarsResult < Column > {
150
+ apply_to_cats ( c, |s| s. str_len_chars ( ) )
142
151
}
143
152
144
153
#[ cfg( feature = "strings" ) ]
145
- fn starts_with ( s : & Column , prefix : & str ) -> PolarsResult < Column > {
146
- let ca = s. categorical ( ) ?;
147
- apply_to_cats ( ca, |s| s. starts_with ( prefix) )
154
+ fn starts_with ( c : & Column , prefix : & str ) -> PolarsResult < Column > {
155
+ apply_to_cats ( c, |s| s. starts_with ( prefix) )
148
156
}
149
157
150
158
#[ cfg( feature = "strings" ) ]
151
- fn ends_with ( s : & Column , suffix : & str ) -> PolarsResult < Column > {
152
- let ca = s. categorical ( ) ?;
153
- apply_to_cats_binary ( ca, |s| s. as_binary ( ) . ends_with ( suffix. as_bytes ( ) ) )
159
+ fn ends_with ( c : & Column , suffix : & str ) -> PolarsResult < Column > {
160
+ apply_to_cats_binary ( c, |s| s. as_binary ( ) . ends_with ( suffix. as_bytes ( ) ) )
161
+ }
162
+
163
+ #[ cfg( feature = "strings" ) ]
164
+ fn slice ( c : & Column , offset : i64 , length : Option < usize > ) -> PolarsResult < Column > {
165
+ let length = length. unwrap_or ( usize:: MAX ) as u64 ;
166
+ let ca = c. categorical ( ) ?;
167
+ let ( categories, phys) = _get_cat_phys_map ( ca) ;
168
+
169
+ let result = unsafe {
170
+ categories. apply_views ( |view, val| {
171
+ let ( start, end) = substring_ternary_offsets_value ( val, offset, length) ;
172
+ update_view ( view, start, end, val)
173
+ } )
174
+ } ;
175
+ // SAFETY: physical idx array is valid.
176
+ let out = unsafe { result. take_unchecked ( phys. idx ( ) . unwrap ( ) ) } ;
177
+ Ok ( out. into_column ( ) )
154
178
}
0 commit comments