diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 1998042ee3a..c7fcaf1340b 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -1536,6 +1536,7 @@ def apply_function_on_filtered_inputs(inputs, indices, check_same_num_examples=F writer_batch_size=writer_batch_size, update_features=update_features, fingerprint=new_fingerprint, + disable_nullable=disable_nullable, ) else: buf_writer = None @@ -1547,6 +1548,7 @@ def apply_function_on_filtered_inputs(inputs, indices, check_same_num_examples=F writer_batch_size=writer_batch_size, update_features=update_features, fingerprint=new_fingerprint, + disable_nullable=disable_nullable, ) try: diff --git a/src/datasets/arrow_writer.py b/src/datasets/arrow_writer.py index 2f80296d20f..46d7dc2bf66 100644 --- a/src/datasets/arrow_writer.py +++ b/src/datasets/arrow_writer.py @@ -274,7 +274,7 @@ def write_batch( typed_sequence = TypedSequence(batch_examples[col], type=col_type, try_type=col_try_type) typed_sequence_examples[col] = typed_sequence pa_table = pa.Table.from_pydict(typed_sequence_examples) - self.write_table(pa_table) + self.write_table(pa_table, writer_batch_size) def write_table(self, pa_table: pa.Table, writer_batch_size: Optional[int] = None): """Write a batch of Example to file. diff --git a/src/datasets/inspect.py b/src/datasets/inspect.py index 61b5881ac61..efaf1df3a63 100644 --- a/src/datasets/inspect.py +++ b/src/datasets/inspect.py @@ -38,7 +38,7 @@ def list_datasets(with_community_datasets=True, with_details=False): return api.dataset_list(with_community_datasets=with_community_datasets, id_only=bool(not with_details)) -def list_metrics(with_community_metrics=True, id_only=False, with_details=False): +def list_metrics(with_community_metrics=True, with_details=False): """List all the metrics script available on HuggingFace AWS bucket Args: diff --git a/src/datasets/metric.py b/src/datasets/metric.py index 2697f079843..57823fd1fb0 100644 --- a/src/datasets/metric.py +++ b/src/datasets/metric.py @@ -509,7 +509,6 @@ def download_and_prepare( self, download_config: Optional[DownloadConfig] = None, dl_manager: Optional[DownloadManager] = None, - **download_and_prepare_kwargs, ): """Downloads and prepares dataset for reading. diff --git a/src/datasets/utils/file_utils.py b/src/datasets/utils/file_utils.py index 2e1ac647928..5cd82546be5 100644 --- a/src/datasets/utils/file_utils.py +++ b/src/datasets/utils/file_utils.py @@ -449,7 +449,7 @@ def ftp_head(url, timeout=10.0): return True -def ftp_get(url, temp_file, proxies=None, resume_size=0, headers=None, cookies=None, timeout=10.0): +def ftp_get(url, temp_file, timeout=10.0): _raise_if_offline_mode_is_enabled(f"Tried to reach {url}") try: logger.info(f"Getting through FTP {url} into {temp_file.name}") @@ -651,7 +651,7 @@ def _resumable_file_manager(): # GET file object if url.startswith("ftp://"): - ftp_get(url, temp_file, proxies=proxies, resume_size=resume_size, headers=headers, cookies=cookies) + ftp_get(url, temp_file) else: http_get( url,