Skip to content

Commit

Permalink
Store local paths as pathlib paths
Browse files Browse the repository at this point in the history
  • Loading branch information
EpicWink committed Apr 15, 2024
1 parent 5afcd9e commit e57721e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 50 deletions.
69 changes: 22 additions & 47 deletions src/proxpi/_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import time
import shutil
import logging
import pathlib
import tempfile
import warnings
import functools
import posixpath
import threading
import dataclasses
import typing as t
Expand Down Expand Up @@ -574,7 +574,7 @@ class _CachedFile:

__slots__ = ("path", "size", "n_hits")

path: str
path: pathlib.Path
"""File path."""

size: int
Expand All @@ -584,40 +584,19 @@ class _CachedFile:
"""Number of cache hits."""


def _split_path(
path: str, split: t.Callable[[str], t.Tuple[str, str]]
) -> t.Generator[str, None, None]:
"""Split path into directory components.
Args:
path: path to split
split: path-split functions
Returns:
path parts generator
"""

parent, filename = split(path)
if not filename:
return
if parent:
yield from _split_path(parent, split)
yield filename


class _FileCache:
"""Package files cache."""

max_size: int
cache_dir: str
cache_dir: pathlib.Path
_cache_dir_provided: t.Union[str, None]
_files: t.Dict[str, t.Union[_CachedFile, Thread]]
_evict_lock: threading.Lock

def __init__(
self,
max_size: int,
cache_dir: str = None,
cache_dir: t.Union[str, pathlib.Path] = None,
download_timeout: float = 0.9,
session: requests.Session = None,
):
Expand All @@ -632,7 +611,7 @@ def __init__(
"""

self.max_size = max_size
self.cache_dir = os.path.abspath(cache_dir or tempfile.mkdtemp())
self.cache_dir = pathlib.Path(cache_dir or tempfile.mkdtemp()).absolute()
self.download_timeout = download_timeout
self.session = session or requests.Session()
self._cache_dir_provided = cache_dir
Expand All @@ -648,19 +627,16 @@ def __repr__(self):
)

def __del__(self):
if not self._cache_dir_provided and os.path.isdir(self.cache_dir):
if not self._cache_dir_provided and self.cache_dir.is_dir():
logger.debug(f"Deleting '{self.cache_dir}'")
shutil.rmtree(self.cache_dir)
shutil.rmtree(str(self.cache_dir))

def _populate_files_from_existing_cache_dir(self):
"""Populate from user-provided cache directory."""
for dirpath, _, filenames in os.walk(self.cache_dir):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
size = os.path.getsize(filepath)
name = os.path.relpath(filepath, self.cache_dir)
if os.path != posixpath:
name = posixpath.join(*_split_path(name, os.path.split))
for filepath in self.cache_dir.glob("**/*"):
size = filepath.stat().st_size
name = str(pathlib.PurePosixPath(filepath.relative_to(self.cache_dir)))
if True: # minimise Git diff
self._files[name] = _CachedFile(filepath, size, n_hits=0)

@staticmethod
Expand All @@ -669,9 +645,9 @@ def _get_key(url: str) -> str:
"""Get file cache reference key from file URL."""
urlsplit = urllib.parse.urlsplit(url)
parent = _hostname_normalise_pattern.sub("-", urlsplit.hostname)
return posixpath.join(parent, *_split_path(urlsplit.path, posixpath.split))
return str(pathlib.PurePosixPath(parent) / urlsplit.path[1:])

def _download_file(self, url: str, path: str):
def _download_file(self, url: str, path: pathlib.Path) -> None:
"""Download a file.
Args:
Expand All @@ -688,13 +664,12 @@ def _download_file(self, url: str, path: str):
f"status={response.status_code}, body={response.text}"
)
return
parent, _ = os.path.split(path)
os.makedirs(parent, exist_ok=True)
with open(path, "wb") as f:
path.parent.mkdir(parents=True, exist_ok=True)
with open(str(path), "wb") as f:
for chunk in response.iter_content(None):
f.write(chunk)
key = self._get_key(url)
self._files[key] = _CachedFile(path, os.stat(path).st_size, 0)
self._files[key] = _CachedFile(path, path.stat().st_size, 0)
logger.debug(f"Finished downloading '{url_masked}'")

def _wait_for_existing_download(self, url: str) -> bool:
Expand All @@ -719,19 +694,19 @@ def _wait_for_existing_download(self, url: str) -> bool:
return True # default to original URL (due to timeout or HTTP error)
return False

def _get_cached(self, url: str) -> t.Union[str, None]:
def _get_cached(self, url: str) -> t.Union[pathlib.Path, None]:
"""Get file from cache."""
if url in self._files:
file = self._files[url]
assert isinstance(file, _CachedFile)
file.n_hits += 1
return file.path
return pathlib.Path(file.path)
return None

def _start_downloading(self, url: str):
"""Start downloading a file."""
key = self._get_key(url)
path = os.path.join(self.cache_dir, *_split_path(key, posixpath.split))
path = pathlib.Path(pathlib.PurePosixPath(self.cache_dir) / key)

thread = Thread(target=self._download_file, args=(url, path))
self._files[key] = thread
Expand All @@ -748,10 +723,10 @@ def _evict_lfu(self, url: str):
while existing_size + file_size > self.max_size and existing_size > 0:
existing_url = cache_keys.pop(0)
file = self._files.pop(existing_url)
os.unlink(file.path)
file.path.unlink()
existing_size -= file.size

def get(self, url: str) -> str:
def get(self, url: str) -> t.Union[str, pathlib.Path]:
"""Get a file using or updating cache.
Args:
Expand Down Expand Up @@ -884,7 +859,7 @@ def list_files(self, package_name: str) -> t.List[File]:
raise exc
return files

def get_file(self, package_name: str, file_name: str) -> str:
def get_file(self, package_name: str, file_name: str) -> t.Union[str, pathlib.Path]:
"""Get a file.
Args:
Expand Down
5 changes: 2 additions & 3 deletions src/proxpi/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import gzip
import zlib
import logging
import pathlib
import typing as t
import urllib.parse

import flask
import jinja2
Expand Down Expand Up @@ -203,8 +203,7 @@ def get_file(package_name: str, file_name: str):
except _cache.NotFound:
flask.abort(404)
raise
scheme = urllib.parse.urlparse(path).scheme
if scheme and scheme != "file":
if not isinstance(path, pathlib.Path):
return flask.redirect(path)
return flask.send_file(path, mimetype=_file_mime_type)

Expand Down

0 comments on commit e57721e

Please sign in to comment.