From 7477a53287cf0e84eec563dc3751dff6ad41713e Mon Sep 17 00:00:00 2001 From: Frank Ruis Date: Mon, 22 Apr 2024 01:55:20 +0200 Subject: [PATCH] wrap prepared_ds_path in str() to avoid TypeError in fsspec package (#1548) * wrap prepared_ds_path in str() to avoid TypeError in fsspec package `fsspec` calls `if "::" in path` on `prepared_ds_path`, which will throw an error if it is a `PosixPath` object. * update test too --------- Co-authored-by: Wing Lian --- src/axolotl/utils/data/sft.py | 2 +- tests/test_datasets.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index 39c50b1a0..dbc4172b4 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -421,7 +421,7 @@ def for_d_in_datasets(dataset_configs): if cfg.local_rank == 0: LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}") - dataset.save_to_disk(prepared_ds_path) + dataset.save_to_disk(str(prepared_ds_path)) if cfg.push_dataset_to_hub: LOG.info( f"Saving merged prepared dataset with push_to_hub... {cfg.push_dataset_to_hub}/{ds_hash}" diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 8b7b3dae6..a274b7b89 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -110,7 +110,7 @@ def test_load_from_save_to_disk(self): """Usual use case. Verify datasets saved via `save_to_disk` can be loaded.""" with tempfile.TemporaryDirectory() as tmp_dir: tmp_ds_name = Path(tmp_dir) / "tmp_dataset" - self.dataset.save_to_disk(tmp_ds_name) + self.dataset.save_to_disk(str(tmp_ds_name)) prepared_path = Path(tmp_dir) / "prepared" cfg = DictDefault(