From 54ddc90122c0480a2ea871a752bf1e0f20189c0f Mon Sep 17 00:00:00 2001 From: Nicolas Grandemange <1694892+epizut@users.noreply.github.com> Date: Tue, 4 Jul 2023 16:13:18 +0200 Subject: [PATCH 1/2] Adding support for onerror parameter in walk --- fsspec/asyn.py | 6 ++++-- fsspec/spec.py | 11 +++++++++-- fsspec/tests/test_api.py | 18 ++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/fsspec/asyn.py b/fsspec/asyn.py index 5e9911535..6b6b7b821 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -641,7 +641,7 @@ async def _info(self, path, **kwargs): async def _ls(self, path, detail=True, **kwargs): raise NotImplementedError - async def _walk(self, path, maxdepth=None, **kwargs): + async def _walk(self, path, maxdepth=None, onerror=None, **kwargs): if maxdepth is not None and maxdepth < 1: raise ValueError("maxdepth must be at least 1") @@ -653,7 +653,9 @@ async def _walk(self, path, maxdepth=None, **kwargs): detail = kwargs.pop("detail", False) try: listing = await self._ls(path, detail=True, **kwargs) - except (FileNotFoundError, OSError): + except (FileNotFoundError, OSError) as e: + if onerror is not None: + onerror(e) if detail: yield path, {}, {} else: diff --git a/fsspec/spec.py b/fsspec/spec.py index 2bdfa3854..d628d7b12 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -372,7 +372,7 @@ def _ls_from_cache(self, path): except KeyError: pass - def walk(self, path, maxdepth=None, topdown=True, **kwargs): + def walk(self, path, maxdepth=None, topdown=True, onerror=None, **kwargs): """Return all files belows path List all files, recursing into subdirectories; output is iterator-style, @@ -386,6 +386,11 @@ def walk(self, path, maxdepth=None, topdown=True, **kwargs): it resumes walk() again. Modifying dirnames when topdown is False has no effect. (see os.walk) + If optional argument onerror is specified, it should be a function; + it will be called with one argument, an OSError instance. + It can report the error to continue with the walk, + or raise the exception to abort the walk. + Note that the "files" outputted will include anything that is not a directory, such as links. @@ -412,7 +417,9 @@ def walk(self, path, maxdepth=None, topdown=True, **kwargs): detail = kwargs.pop("detail", False) try: listing = self.ls(path, detail=True, **kwargs) - except (FileNotFoundError, OSError): + except (FileNotFoundError, OSError) as e: + if onerror is not None: + onerror(e) if detail: return path, {}, {} return path, [], [] diff --git a/fsspec/tests/test_api.py b/fsspec/tests/test_api.py index ed7aa0d3a..44412465c 100644 --- a/fsspec/tests/test_api.py +++ b/fsspec/tests/test_api.py @@ -4,6 +4,7 @@ import os import pickle import tempfile +from unittest.mock import Mock import pytest @@ -479,3 +480,20 @@ def _walk(*args, **kwargs): (dir12, [], ["file121"]), (dir1, ["dir11", "dir12"], ["file11"]), ] + + # onerror skip by default + assert list(m.walk("do_not_exist")) == [] + # onerror skip function + mock = Mock() + assert list(m.walk("do_not_exist", onerror=mock.onerror)) == [] + mock.onerror.assert_called() + assert mock.onerror.call_args.kwargs == {} + assert len(mock.onerror.call_args.args) == 1 + assert isinstance(mock.onerror.call_args.args[0], FileNotFoundError) + # onerror re-raise function + with pytest.raises(FileNotFoundError): + + def onerror(e): + raise e + + list(m.walk("do_not_exist", onerror=onerror)) From 79b8bd7e859e133cb56a386ac07d09c8cdb78148 Mon Sep 17 00:00:00 2001 From: Nicolas Grandemange <1694892+epizut@users.noreply.github.com> Date: Mon, 17 Jul 2023 14:15:43 +0200 Subject: [PATCH 2/2] Adding support for on_error parameter in walk --- fsspec/asyn.py | 8 +++++--- fsspec/spec.py | 17 +++++++++-------- fsspec/tests/test_api.py | 18 ++++++++---------- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fsspec/asyn.py b/fsspec/asyn.py index 6b6b7b821..b8f8642a0 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -641,7 +641,7 @@ async def _info(self, path, **kwargs): async def _ls(self, path, detail=True, **kwargs): raise NotImplementedError - async def _walk(self, path, maxdepth=None, onerror=None, **kwargs): + async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs): if maxdepth is not None and maxdepth < 1: raise ValueError("maxdepth must be at least 1") @@ -654,8 +654,10 @@ async def _walk(self, path, maxdepth=None, onerror=None, **kwargs): try: listing = await self._ls(path, detail=True, **kwargs) except (FileNotFoundError, OSError) as e: - if onerror is not None: - onerror(e) + if on_error == "raise": + raise + elif callable(on_error): + on_error(e) if detail: yield path, {}, {} else: diff --git a/fsspec/spec.py b/fsspec/spec.py index d628d7b12..457c082e2 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -372,7 +372,7 @@ def _ls_from_cache(self, path): except KeyError: pass - def walk(self, path, maxdepth=None, topdown=True, onerror=None, **kwargs): + def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): """Return all files belows path List all files, recursing into subdirectories; output is iterator-style, @@ -386,11 +386,6 @@ def walk(self, path, maxdepth=None, topdown=True, onerror=None, **kwargs): it resumes walk() again. Modifying dirnames when topdown is False has no effect. (see os.walk) - If optional argument onerror is specified, it should be a function; - it will be called with one argument, an OSError instance. - It can report the error to continue with the walk, - or raise the exception to abort the walk. - Note that the "files" outputted will include anything that is not a directory, such as links. @@ -404,6 +399,10 @@ def walk(self, path, maxdepth=None, topdown=True, onerror=None, **kwargs): topdown: bool (True) Whether to walk the directory tree from the top downwards or from the bottom upwards. + on_error: "omit", "raise", a collable + if omit (default), path with exception will simply be empty; + If raise, an underlying exception will be raised; + if callable, it will be called with a single OSError instance as argument kwargs: passed to ``ls`` """ if maxdepth is not None and maxdepth < 1: @@ -418,8 +417,10 @@ def walk(self, path, maxdepth=None, topdown=True, onerror=None, **kwargs): try: listing = self.ls(path, detail=True, **kwargs) except (FileNotFoundError, OSError) as e: - if onerror is not None: - onerror(e) + if on_error == "raise": + raise + elif callable(on_error): + on_error(e) if detail: return path, {}, {} return path, [], [] diff --git a/fsspec/tests/test_api.py b/fsspec/tests/test_api.py index 44412465c..08c499273 100644 --- a/fsspec/tests/test_api.py +++ b/fsspec/tests/test_api.py @@ -481,19 +481,17 @@ def _walk(*args, **kwargs): (dir1, ["dir11", "dir12"], ["file11"]), ] - # onerror skip by default + # on_error omit by default assert list(m.walk("do_not_exist")) == [] - # onerror skip function + # on_error omit + assert list(m.walk("do_not_exist", on_error="omit")) == [] + # on_error raise + with pytest.raises(FileNotFoundError): + list(m.walk("do_not_exist", on_error="raise")) + # on_error callable function mock = Mock() - assert list(m.walk("do_not_exist", onerror=mock.onerror)) == [] + assert list(m.walk("do_not_exist", on_error=mock.onerror)) == [] mock.onerror.assert_called() assert mock.onerror.call_args.kwargs == {} assert len(mock.onerror.call_args.args) == 1 assert isinstance(mock.onerror.call_args.args[0], FileNotFoundError) - # onerror re-raise function - with pytest.raises(FileNotFoundError): - - def onerror(e): - raise e - - list(m.walk("do_not_exist", onerror=onerror))