Skip to content

Commit

Permalink
fix: add missing async code changes
Browse files Browse the repository at this point in the history
  • Loading branch information
john-jam committed Aug 8, 2023
1 parent 2f33e1b commit 6362058
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 18 deletions.
58 changes: 40 additions & 18 deletions fsspec/asyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,25 +703,24 @@ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
):
yield _

async def _glob(self, path, **kwargs):
async def _glob(self, path, maxdepth=None, **kwargs):
if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

import re

ends = path.endswith("/")
path = self._strip_protocol(path)
indstar = path.find("*") if path.find("*") >= 0 else len(path)
indques = path.find("?") if path.find("?") >= 0 else len(path)
indbrace = path.find("[") if path.find("[") >= 0 else len(path)
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)

ind = min(indstar, indques, indbrace)
min_idx = min(idx_star, idx_qmark, idx_brace)

detail = kwargs.pop("detail", False)

if not has_magic(path):
root = path
depth = 1
if ends:
path += "/*"
elif await self._exists(path):
if await self._exists(path):
if not detail:
return [path]
else:
Expand All @@ -731,13 +730,21 @@ async def _glob(self, path, **kwargs):
return [] # glob of non-existent returns empty
else:
return {}
elif "/" in path[:ind]:
ind2 = path[:ind].rindex("/")
root = path[: ind2 + 1]
depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1
elif "/" in path[:min_idx]:
min_idx = path[:min_idx].rindex("/")
root = path[: min_idx + 1]
depth = path[min_idx + 1 :].count("/") + 1
else:
root = ""
depth = None if "**" in path else path[ind + 1 :].count("/") + 1
depth = path[min_idx + 1 :].count("/") + 1

if "**" in path:
if maxdepth is not None:
idx_double_stars = path.find("**")
depth_double_stars = path[idx_double_stars:].count("/") + 1
depth = depth - depth_double_stars + maxdepth
else:
depth = None

allpaths = await self._find(
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
Expand Down Expand Up @@ -765,14 +772,23 @@ async def _glob(self, path, **kwargs):
)
+ "$"
)
pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern)
pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
pattern = re.sub("[*]", "[^/]*", pattern)
pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*"))
pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
pattern = re.compile(pattern)
out = {
p: allpaths[p]
for p in sorted(allpaths)
if pattern.match(p.replace("//", "/").rstrip("/"))
}

# Return directories only when the glob end by a slash
# This is needed for posix glob compliance
if ends:
out = {k: v for k, v in out.items() if v["type"] == "directory"}

if detail:
return out
else:
Expand All @@ -793,6 +809,12 @@ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
path = self._strip_protocol(path)
out = dict()
detail = kwargs.pop("detail", False)

# Add the root directory if withdirs is requested
# This is needed for posix glob compliance
if withdirs and await self._isdir(path):
out[path] = await self._info(path)

# async for?
async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
if withdirs:
Expand All @@ -819,7 +841,7 @@ async def _expand_path(self, path, recursive=False, maxdepth=None):
path = [self._strip_protocol(p) for p in path]
for p in path: # can gather here
if has_magic(p):
bit = set(await self._glob(p))
bit = set(await self._glob(p, maxdepth=maxdepth))
out |= bit
if recursive:
# glob call above expanded one depth so if maxdepth is defined
Expand Down
4 changes: 4 additions & 0 deletions fsspec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,8 +635,12 @@ def glob(self, path, maxdepth=None, **kwargs):
for p in sorted(allpaths)
if pattern.match(p.replace("//", "/").rstrip("/"))
}

# Return directories only when the glob end by a slash
# This is needed for posix glob compliance
if ends:
out = {k: v for k, v in out.items() if v["type"] == "directory"}

if detail:
return out
else:
Expand Down

0 comments on commit 6362058

Please sign in to comment.