diff --git a/fsspec/asyn.py b/fsspec/asyn.py index e6d5edb9d..496b81188 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -703,25 +703,24 @@ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs): ): yield _ - async def _glob(self, path, **kwargs): + async def _glob(self, path, maxdepth=None, **kwargs): + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + import re ends = path.endswith("/") path = self._strip_protocol(path) - indstar = path.find("*") if path.find("*") >= 0 else len(path) - indques = path.find("?") if path.find("?") >= 0 else len(path) - indbrace = path.find("[") if path.find("[") >= 0 else len(path) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) - ind = min(indstar, indques, indbrace) + min_idx = min(idx_star, idx_qmark, idx_brace) detail = kwargs.pop("detail", False) if not has_magic(path): - root = path - depth = 1 - if ends: - path += "/*" - elif await self._exists(path): + if await self._exists(path): if not detail: return [path] else: @@ -731,13 +730,21 @@ async def _glob(self, path, **kwargs): return [] # glob of non-existent returns empty else: return {} - elif "/" in path[:ind]: - ind2 = path[:ind].rindex("/") - root = path[: ind2 + 1] - depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1 + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 else: root = "" - depth = None if "**" in path else path[ind + 1 :].count("/") + 1 + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None allpaths = await self._find( root, maxdepth=depth, withdirs=True, detail=True, **kwargs @@ -765,14 +772,23 @@ async def _glob(self, path, **kwargs): ) + "$" ) - pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern) + pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern) + pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern) pattern = re.sub("[*]", "[^/]*", pattern) - pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*")) + pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern) + pattern = re.sub("=DOUBLE_STARS=", ".*", pattern) + pattern = re.compile(pattern) out = { p: allpaths[p] for p in sorted(allpaths) if pattern.match(p.replace("//", "/").rstrip("/")) } + + # Return directories only when the glob end by a slash + # This is needed for posix glob compliance + if ends: + out = {k: v for k, v in out.items() if v["type"] == "directory"} + if detail: return out else: @@ -793,6 +809,12 @@ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs): path = self._strip_protocol(path) out = dict() detail = kwargs.pop("detail", False) + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and await self._isdir(path): + out[path] = await self._info(path) + # async for? async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs): if withdirs: @@ -819,7 +841,7 @@ async def _expand_path(self, path, recursive=False, maxdepth=None): path = [self._strip_protocol(p) for p in path] for p in path: # can gather here if has_magic(p): - bit = set(await self._glob(p)) + bit = set(await self._glob(p, maxdepth=maxdepth)) out |= bit if recursive: # glob call above expanded one depth so if maxdepth is defined diff --git a/fsspec/spec.py b/fsspec/spec.py index 224623960..0eb513032 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -635,8 +635,12 @@ def glob(self, path, maxdepth=None, **kwargs): for p in sorted(allpaths) if pattern.match(p.replace("//", "/").rstrip("/")) } + + # Return directories only when the glob end by a slash + # This is needed for posix glob compliance if ends: out = {k: v for k, v in out.items() if v["type"] == "directory"} + if detail: return out else: