diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index 379cf04cf..1e3bf4fc3 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -695,7 +695,7 @@ def _open(self, path, mode="rb", **kwargs): data = f.read(block) f2.write(data) else: - self.fs.get(path, fn) + self.fs.get_file(path, fn) self.save_cache() return self._open(path, mode) @@ -775,7 +775,7 @@ def _open(self, path, mode="rb", **kwargs): data = f.read(block) f2.write(data) else: - self.fs.get(path, fn) + self.fs.get_file(path, fn) return self._open(path, mode) diff --git a/fsspec/implementations/dirfs.py b/fsspec/implementations/dirfs.py index 3e6def1f5..a3eac87ef 100644 --- a/fsspec/implementations/dirfs.py +++ b/fsspec/implementations/dirfs.py @@ -10,6 +10,8 @@ class DirFileSystem(AsyncFileSystem): delegates everything to the wrapped filesystem. """ + protocol = "dir" + def __init__( self, path=None, @@ -53,7 +55,7 @@ def _join(self, path): return path if not path: return self.path - return self.fs.sep.join((self.path, path)) + return self.fs.sep.join((self.path, self._strip_protocol(path))) return [self._join(_path) for _path in path] def _relpath(self, path): diff --git a/fsspec/implementations/memory.py b/fsspec/implementations/memory.py index fc89615bc..b4b35e46e 100644 --- a/fsspec/implementations/memory.py +++ b/fsspec/implementations/memory.py @@ -137,10 +137,6 @@ def rmdir(self, path): else: raise FileNotFoundError(path) - def exists(self, path, **kwargs): - path = self._strip_protocol(path) - return path in self.store or path in self.pseudo_dirs - def info(self, path, **kwargs): path = self._strip_protocol(path) if path in self.pseudo_dirs or any( @@ -191,11 +187,14 @@ def _open( return f else: raise FileNotFoundError(path) - if mode == "wb": + elif mode == "wb": m = MemoryFile(self, path, kwargs.get("data")) if not self._intrans: m.commit() return m + else: + name = self.__class__.__name__ + raise ValueError(f"unsupported file mode for {name}: {mode!r}") def cp_file(self, path1, path2, **kwargs): path1 = self._strip_protocol(path1) diff --git a/fsspec/implementations/tests/test_dirfs.py b/fsspec/implementations/tests/test_dirfs.py index 402f8f67d..98e7fc6e5 100644 --- a/fsspec/implementations/tests/test_dirfs.py +++ b/fsspec/implementations/tests/test_dirfs.py @@ -372,6 +372,12 @@ def test_glob(dirfs): dirfs.fs.glob.assert_called_once_with(f"{PATH}/*", **KWARGS) +def test_glob_with_protocol(dirfs): + dirfs.fs.glob.return_value = [f"{PATH}/one", f"{PATH}/two"] + assert dirfs.glob("dir://*", **KWARGS) == ["one", "two"] + dirfs.fs.glob.assert_called_once_with(f"{PATH}/*", **KWARGS) + + @pytest.mark.asyncio async def test_async_glob_detail(adirfs): adirfs.fs._glob.return_value = { diff --git a/fsspec/implementations/tests/test_memory.py b/fsspec/implementations/tests/test_memory.py index bb6fd4bfb..5bf1131c9 100644 --- a/fsspec/implementations/tests/test_memory.py +++ b/fsspec/implementations/tests/test_memory.py @@ -58,6 +58,44 @@ def test_directories(m): assert not m.store +def test_exists_isdir_isfile(m): + m.mkdir("/root") + m.touch("/root/a") + + assert m.exists("/root") + assert m.isdir("/root") + assert not m.isfile("/root") + + assert m.exists("/root/a") + assert m.isfile("/root/a") + assert not m.isdir("/root/a") + + assert not m.exists("/root/not-exists") + assert not m.isfile("/root/not-exists") + assert not m.isdir("/root/not-exists") + + m.rm("/root/a") + m.rmdir("/root") + + assert not m.exists("/root") + + m.touch("/a/b") + assert m.isfile("/a/b") + + assert m.exists("/a") + assert m.isdir("/a") + assert not m.isfile("/a") + + +def test_touch(m): + m.touch("/root/a") + with pytest.raises(FileExistsError): + m.touch("/root/a/b") + with pytest.raises(FileExistsError): + m.touch("/root/a/b/c") + assert not m.exists("/root/a/b/") + + def test_mv_recursive(m): m.mkdir("src") m.touch("src/file.txt") diff --git a/fsspec/registry.py b/fsspec/registry.py index 9e464ea4d..09ad63c76 100644 --- a/fsspec/registry.py +++ b/fsspec/registry.py @@ -233,6 +233,14 @@ def get_filesystem_class(protocol): return cls +s3_msg = """Your installed version of s3fs is very old and known to cause +severe performance issues, see also https://github.com/dask/dask/issues/10276 + +To fix, you should specify a lower version bound on s3fs, or +update the current installation. +""" + + def _import_class(cls, minv=None): """Take a string FQP and return the imported class or identifier @@ -240,13 +248,19 @@ def _import_class(cls, minv=None): """ if ":" in cls: mod, name = cls.rsplit(":", 1) + s3 = mod == "s3fs" mod = importlib.import_module(mod) + if s3 and mod.__version__.split(".") < ["0", "5"]: + warnings.warn(s3_msg) for part in name.split("."): mod = getattr(mod, part) return mod else: mod, name = cls.rsplit(".", 1) + s3 = mod == "s3fs" mod = importlib.import_module(mod) + if s3 and mod.__version__.split(".") < ["0", "5"]: + warnings.warn(s3_msg) return getattr(mod, name) diff --git a/fsspec/tests/test_registry.py b/fsspec/tests/test_registry.py index 7e1ab5392..0664912a1 100644 --- a/fsspec/tests/test_registry.py +++ b/fsspec/tests/test_registry.py @@ -121,3 +121,14 @@ def test_filesystem_warning_arrow_hdfs_deprecated(clear_registry, clean_imports) with pytest.warns(DeprecationWarning): filesystem("arrow_hdfs") + + +def test_old_s3(monkeypatch): + from fsspec.registry import _import_class + + s3fs = pytest.importorskip("s3fs") + monkeypatch.setattr(s3fs, "__version__", "0.4.2") + with pytest.warns(): + _import_class("s3fs:S3FileSystem") + with pytest.warns(): + _import_class("s3fs.S3FileSystem")