@@ -96,6 +96,7 @@ def with_diff_column(self, diff_column: str) -> 'DiffOptions':
96
96
:return: new immutable DiffOptions instance
97
97
:rtype: DiffOptions
98
98
"""
99
+ assert isinstance (diff_column , str ), diff_column
99
100
return dataclasses .replace (self , diff_column = diff_column )
100
101
101
102
def with_left_column_prefix (self , left_column_prefix : str ) -> 'DiffOptions' :
@@ -108,6 +109,7 @@ def with_left_column_prefix(self, left_column_prefix: str) -> 'DiffOptions':
108
109
:return: new immutable DiffOptions instance
109
110
:rtype: DiffOptions
110
111
"""
112
+ assert isinstance (left_column_prefix , str ), left_column_prefix
111
113
return dataclasses .replace (self , left_column_prefix = left_column_prefix )
112
114
113
115
def with_right_column_prefix (self , right_column_prefix : str ) -> 'DiffOptions' :
@@ -120,6 +122,7 @@ def with_right_column_prefix(self, right_column_prefix: str) -> 'DiffOptions':
120
122
:return: new immutable DiffOptions instance
121
123
:rtype: DiffOptions
122
124
"""
125
+ assert isinstance (right_column_prefix , str ), right_column_prefix
123
126
return dataclasses .replace (self , right_column_prefix = right_column_prefix )
124
127
125
128
def with_insert_diff_value (self , insert_diff_value : str ) -> 'DiffOptions' :
@@ -132,6 +135,7 @@ def with_insert_diff_value(self, insert_diff_value: str) -> 'DiffOptions':
132
135
:return: new immutable DiffOptions instance
133
136
:rtype: DiffOptions
134
137
"""
138
+ assert isinstance (insert_diff_value , str ), insert_diff_value
135
139
return dataclasses .replace (self , insert_diff_value = insert_diff_value )
136
140
137
141
def with_change_diff_value (self , change_diff_value : str ) -> 'DiffOptions' :
@@ -144,6 +148,7 @@ def with_change_diff_value(self, change_diff_value: str) -> 'DiffOptions':
144
148
:return: new immutable DiffOptions instance
145
149
:rtype: DiffOptions
146
150
"""
151
+ assert isinstance (change_diff_value , str ), change_diff_value
147
152
return dataclasses .replace (self , change_diff_value = change_diff_value )
148
153
149
154
def with_delete_diff_value (self , delete_diff_value : str ) -> 'DiffOptions' :
@@ -156,6 +161,7 @@ def with_delete_diff_value(self, delete_diff_value: str) -> 'DiffOptions':
156
161
:return: new immutable DiffOptions instance
157
162
:rtype: DiffOptions
158
163
"""
164
+ assert isinstance (delete_diff_value , str ), delete_diff_value
159
165
return dataclasses .replace (self , delete_diff_value = delete_diff_value )
160
166
161
167
def with_nochange_diff_value (self , nochange_diff_value : str ) -> 'DiffOptions' :
@@ -168,6 +174,7 @@ def with_nochange_diff_value(self, nochange_diff_value: str) -> 'DiffOptions':
168
174
:return: new immutable DiffOptions instance
169
175
:rtype: DiffOptions
170
176
"""
177
+ assert isinstance (nochange_diff_value , str ), nochange_diff_value
171
178
return dataclasses .replace (self , nochange_diff_value = nochange_diff_value )
172
179
173
180
def with_change_column (self , change_column : str ) -> 'DiffOptions' :
@@ -180,6 +187,7 @@ def with_change_column(self, change_column: str) -> 'DiffOptions':
180
187
:return: new immutable DiffOptions instance
181
188
:rtype: DiffOptions
182
189
"""
190
+ assert isinstance (change_column , str ), change_column
183
191
return dataclasses .replace (self , change_column = change_column )
184
192
185
193
def without_change_column (self ) -> 'DiffOptions' :
@@ -202,6 +210,7 @@ def with_diff_mode(self, diff_mode: DiffMode) -> 'DiffOptions':
202
210
:return: new immutable DiffOptions instance
203
211
:rtype: DiffOptions
204
212
"""
213
+ assert isinstance (diff_mode , DiffMode ), diff_mode
205
214
return dataclasses .replace (self , diff_mode = diff_mode )
206
215
207
216
def with_sparse_mode (self , sparse_mode : bool ) -> 'DiffOptions' :
@@ -214,12 +223,18 @@ def with_sparse_mode(self, sparse_mode: bool) -> 'DiffOptions':
214
223
:return: new immutable DiffOptions instance
215
224
:rtype: DiffOptions
216
225
"""
226
+ assert isinstance (sparse_mode , bool ), sparse_mode
217
227
return dataclasses .replace (self , sparse_mode = sparse_mode )
218
228
219
229
def with_default_comparator (self , comparator : DiffComparator ) -> 'DiffOptions' :
230
+ assert isinstance (comparator , DiffComparator ), comparator
220
231
return dataclasses .replace (self , default_comparator = comparator )
221
232
222
233
def with_data_type_comparator (self , comparator : DiffComparator , * data_type : DataType ) -> 'DiffOptions' :
234
+ assert isinstance (comparator , DiffComparator ), comparator
235
+ for dt in data_type :
236
+ assert isinstance (dt , DataType ), dt
237
+
223
238
existing_data_types = {dt .simpleString () for dt in data_type if dt in self .data_type_comparators .keys ()}
224
239
if existing_data_types :
225
240
existing_data_types = sorted (list (existing_data_types ))
@@ -231,6 +246,10 @@ def with_data_type_comparator(self, comparator: DiffComparator, *data_type: Data
231
246
return dataclasses .replace (self , data_type_comparators = data_type_comparators )
232
247
233
248
def with_column_name_comparator (self , comparator : DiffComparator , * column_name : str ) -> 'DiffOptions' :
249
+ assert isinstance (comparator , DiffComparator ), comparator
250
+ for cn in column_name :
251
+ assert isinstance (cn , str ), cn
252
+
234
253
existing_column_names = {cn for cn in column_name if cn in self .column_name_comparators .keys ()}
235
254
if existing_column_names :
236
255
existing_column_names = sorted (list (existing_column_names ))
@@ -242,6 +261,7 @@ def with_column_name_comparator(self, comparator: DiffComparator, *column_name:
242
261
return dataclasses .replace (self , column_name_comparators = column_name_comparators )
243
262
244
263
def comparator_for (self , column : StructField ) -> DiffComparator :
264
+ assert isinstance (column , StructField ), column
245
265
cmp = self .column_name_comparators .get (column .name )
246
266
if cmp is None :
247
267
cmp = self .data_type_comparators .get (column .dataType )
@@ -328,14 +348,24 @@ def diff(self, left: DataFrame, right: DataFrame, *id_or_ignore_columns: Union[s
328
348
:type right: DataFrame
329
349
:param id_or_ignore_columns: either id column names or two lists of column names,
330
350
first the id column names, second the ignore column names
331
- :type id_or_ignore_columns: str
351
+ :type * id_or_ignore_columns: str | Iterable[str]
332
352
:return: the diff DataFrame
333
353
:rtype DataFrame
334
354
"""
335
- if len (id_or_ignore_columns ) == 2 and all ([isinstance (lst , Iterable ) and not isinstance (lst , str ) for lst in id_or_ignore_columns ]):
355
+ assert isinstance (left , DataFrame ), left
356
+ assert isinstance (right , DataFrame ), right
357
+ assert isinstance (id_or_ignore_columns , (str , Iterable )), id_or_ignore_columns
358
+
359
+ if len (id_or_ignore_columns ) == 2 and all (isinstance (lst , Iterable ) and not isinstance (lst , str ) for lst in id_or_ignore_columns ):
336
360
id_columns , ignore_columns = id_or_ignore_columns
337
- else :
361
+ if any (not isinstance (id , str ) for id in id_columns ):
362
+ raise ValueError (f"The id_columns must all be strings: { ', ' .join (type (id ).__name__ for id in id_columns )} " )
363
+ if any (not isinstance (ignore , str ) for ignore in ignore_columns ):
364
+ raise ValueError (f"The ignore_columns must all be strings: { ', ' .join (type (ignore ).__name__ for ignore in ignore_columns )} " )
365
+ elif all (isinstance (lst , str ) for lst in id_or_ignore_columns ):
338
366
id_columns , ignore_columns = (id_or_ignore_columns , [])
367
+ else :
368
+ raise ValueError (f"The id_or_ignore_columns argument must either all be strings or exactly two iterables of strings: { ', ' .join (type (e ).__name__ for e in id_or_ignore_columns )} " )
339
369
340
370
return self ._do_diff (left , right , id_columns , ignore_columns )
341
371
0 commit comments