From a5202c116081e1f549d33a06fa48adfb8aefecc2 Mon Sep 17 00:00:00 2001 From: arenekosreal <17194552+arenekosreal@users.noreply.github.com> Date: Mon, 16 Sep 2024 18:25:58 +0800 Subject: [PATCH] Use watchfiles to track cache Catch asyncio.TimeoutError --- pdm.lock | 77 +++++++++++++++++- pyproject.toml | 2 + src/crx_repo/__init__.py | 2 +- src/crx_repo/client.py | 29 +++++-- src/crx_repo/config/config.py | 2 +- src/crx_repo/config/parser/parser.py | 115 ++------------------------- src/crx_repo/config/parser/toml.py | 3 +- src/crx_repo/server.py | 76 +++++++++++++++--- 8 files changed, 174 insertions(+), 132 deletions(-) diff --git a/pdm.lock b/pdm.lock index dfd93f3..341f31b 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "uvloop"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:eb676d5da4ecd2420ca7d7b64b2f44b5c652eee604769b3847ae1c6d63e1a967" +content_hash = "sha256:5914b33ad6229d80237f5b9f9e9550d437f3a610d4429aba92b06012aa19892d" [[metadata.targets]] requires_python = "~=3.12" @@ -84,6 +84,23 @@ files = [ {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, ] +[[package]] +name = "anyio" +version = "4.4.0" +requires_python = ">=3.8" +summary = "High level compatibility layer for multiple asynchronous event loop implementations" +groups = ["default"] +dependencies = [ + "exceptiongroup>=1.0.2; python_version < \"3.11\"", + "idna>=2.8", + "sniffio>=1.1", + "typing-extensions>=4.1; python_version < \"3.11\"", +] +files = [ + {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, + {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"}, +] + [[package]] name = "attrs" version = "24.2.0" @@ -206,6 +223,15 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "deserializer" +version = "0.1.0" +requires_python = ">=3.12" +git = "https://github.com/arenekosreal/deserializer.git" +revision = "27f33023eb2bf6f8729db967e21e976a7be7a285" +summary = "Deserialize class from dict from json/yaml/toml." +groups = ["default"] + [[package]] name = "frozenlist" version = "1.4.1" @@ -408,6 +434,17 @@ files = [ {file = "ruff-0.6.2.tar.gz", hash = "sha256:239ee6beb9e91feb8e0ec384204a763f36cb53fb895a1a364618c6abb076b3be"}, ] +[[package]] +name = "sniffio" +version = "1.3.1" +requires_python = ">=3.7" +summary = "Sniff out which async library your code is running under" +groups = ["default"] +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + [[package]] name = "uvloop" version = "0.20.0" @@ -424,6 +461,44 @@ files = [ {file = "uvloop-0.20.0.tar.gz", hash = "sha256:4603ca714a754fc8d9b197e325db25b2ea045385e8a3ad05d3463de725fdf469"}, ] +[[package]] +name = "watchfiles" +version = "0.24.0" +requires_python = ">=3.8" +summary = "Simple, modern and high performance file watching and code reload in python." +groups = ["default"] +dependencies = [ + "anyio>=3.0.0", +] +files = [ + {file = "watchfiles-0.24.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7211b463695d1e995ca3feb38b69227e46dbd03947172585ecb0588f19b0d87a"}, + {file = "watchfiles-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4b8693502d1967b00f2fb82fc1e744df128ba22f530e15b763c8d82baee15370"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdab9555053399318b953a1fe1f586e945bc8d635ce9d05e617fd9fe3a4687d6"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34e19e56d68b0dad5cff62273107cf5d9fbaf9d75c46277aa5d803b3ef8a9e9b"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:41face41f036fee09eba33a5b53a73e9a43d5cb2c53dad8e61fa6c9f91b5a51e"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5148c2f1ea043db13ce9b0c28456e18ecc8f14f41325aa624314095b6aa2e9ea"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e4bd963a935aaf40b625c2499f3f4f6bbd0c3776f6d3bc7c853d04824ff1c9f"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c79d7719d027b7a42817c5d96461a99b6a49979c143839fc37aa5748c322f234"}, + {file = "watchfiles-0.24.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:32aa53a9a63b7f01ed32e316e354e81e9da0e6267435c7243bf8ae0f10b428ef"}, + {file = "watchfiles-0.24.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ce72dba6a20e39a0c628258b5c308779b8697f7676c254a845715e2a1039b968"}, + {file = "watchfiles-0.24.0-cp312-none-win32.whl", hash = "sha256:d9018153cf57fc302a2a34cb7564870b859ed9a732d16b41a9b5cb2ebed2d444"}, + {file = "watchfiles-0.24.0-cp312-none-win_amd64.whl", hash = "sha256:551ec3ee2a3ac9cbcf48a4ec76e42c2ef938a7e905a35b42a1267fa4b1645896"}, + {file = "watchfiles-0.24.0-cp312-none-win_arm64.whl", hash = "sha256:b52a65e4ea43c6d149c5f8ddb0bef8d4a1e779b77591a458a893eb416624a418"}, + {file = "watchfiles-0.24.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:3d2e3ab79a1771c530233cadfd277fcc762656d50836c77abb2e5e72b88e3a48"}, + {file = "watchfiles-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:327763da824817b38ad125dcd97595f942d720d32d879f6c4ddf843e3da3fe90"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd82010f8ab451dabe36054a1622870166a67cf3fce894f68895db6f74bbdc94"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d64ba08db72e5dfd5c33be1e1e687d5e4fcce09219e8aee893a4862034081d4e"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1cf1f6dd7825053f3d98f6d33f6464ebdd9ee95acd74ba2c34e183086900a827"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43e3e37c15a8b6fe00c1bce2473cfa8eb3484bbeecf3aefbf259227e487a03df"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88bcd4d0fe1d8ff43675360a72def210ebad3f3f72cabfeac08d825d2639b4ab"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:999928c6434372fde16c8f27143d3e97201160b48a614071261701615a2a156f"}, + {file = "watchfiles-0.24.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:30bbd525c3262fd9f4b1865cb8d88e21161366561cd7c9e1194819e0a33ea86b"}, + {file = "watchfiles-0.24.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:edf71b01dec9f766fb285b73930f95f730bb0943500ba0566ae234b5c1618c18"}, + {file = "watchfiles-0.24.0-cp313-none-win32.whl", hash = "sha256:f4c96283fca3ee09fb044f02156d9570d156698bc3734252175a38f0e8975f07"}, + {file = "watchfiles-0.24.0-cp313-none-win_amd64.whl", hash = "sha256:a974231b4fdd1bb7f62064a0565a6b107d27d21d9acb50c484d2cdba515b9366"}, + {file = "watchfiles-0.24.0.tar.gz", hash = "sha256:afb72325b74fa7a428c009c1b8be4b4d7c2afedafb2982827ef2156646df2fe1"}, +] + [[package]] name = "yarl" version = "1.9.4" diff --git a/pyproject.toml b/pyproject.toml index 2e36e42..41e7176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,8 @@ requires-python = "<4.0,>=3.12" dependencies = [ "aiohttp<4.0.0,>=3.9.3", "defusedxml>=0.7.1", + "watchfiles>=0.24.0", + "deserializer @ git+https://github.com/arenekosreal/deserializer.git", ] name = "crx-repo" description = "Download Chrom(e|ium) extensions from Chrome Web Store and serve a update manifest." diff --git a/src/crx_repo/__init__.py b/src/crx_repo/__init__.py index 3b728f1..d420eca 100644 --- a/src/crx_repo/__init__.py +++ b/src/crx_repo/__init__.py @@ -21,7 +21,7 @@ from crx_repo.config.parser import parse_config_async as _parse_config_async -__version__ = "0.1.0" +__version__ = "0.2.0" _logger = logging.getLogger(__name__) diff --git a/src/crx_repo/client.py b/src/crx_repo/client.py index ce2da72..ca34702 100644 --- a/src/crx_repo/client.py +++ b/src/crx_repo/client.py @@ -5,6 +5,7 @@ import logging from http import HTTPStatus from typing import TypeGuard +from aiohttp import ClientError from pathlib import Path from urllib.parse import urlencode from aiohttp.client import ClientSession @@ -37,7 +38,7 @@ def __init__( if self.proxy is not None: _logger.info("Using proxy %s to download extension...", self.proxy) self.CHROME_WEB_STORE_API_BASE = "https://clients2.google.com/service/update2/crx" - self.CHUNK_SIZE_BYTES = 10240 + self.CHUNK_SIZE_BYTES = 1024 * 1024 # 1MB async def download_forever(self): """Download extension forever.""" @@ -46,6 +47,12 @@ async def download_forever(self): await self._do_download() await asyncio.sleep(self.interval) except asyncio.CancelledError: + _logger.debug("Cleaning old extensions...") + for p in sorted( + self.cache_path.rglob("*.crx"), + key=lambda p: p.stat().st_mtime, + )[:-1]: + p.unlink() _logger.debug( "Stopping downloader for extension %s", self.extension_id, @@ -68,12 +75,21 @@ async def _do_download(self): if response.content_length != int(size): _logger.warning("Content-Length is not equals to size returned by API.") hash_calculator = hashlib.sha256() - extension_path = self.cache_path / (version + ".crx") + extension_path = self.cache_path / (version + ".crx.part") with extension_path.open("wb") as writer: - async for chunk in response.content.iter_chunked(self.CHUNK_SIZE_BYTES): - _logger.debug("Writing %s byte(s) into %s...", len(chunk), extension_path) - hash_calculator.update(chunk) - _ = writer.write(chunk) + try: + async for chunk in response.content.iter_chunked(self.CHUNK_SIZE_BYTES): + chunk_size = writer.write(chunk) + hash_calculator.update(chunk) + _logger.debug( + "Writing %s byte(s) into %s...", + chunk_size, + extension_path, + ) + except ClientError as e: + _logger.error("Failed to download because %s", e) + except asyncio.TimeoutError: + _logger.error("Failed to build because async operation timeout.") _logger.debug("Checking checksums of extension %s...", self.extension_id) sha256_hash = hash_calculator.hexdigest() if sha256_hash != sha256: @@ -87,6 +103,7 @@ async def _do_download(self): "SHA256 checksum of %s match. Keeping file.", self.extension_id, ) + _ = extension_path.rename(extension_path.parent / extension_path.stem) async def _check_update( self, diff --git a/src/crx_repo/config/config.py b/src/crx_repo/config/config.py index 7aed46e..7528030 100644 --- a/src/crx_repo/config/config.py +++ b/src/crx_repo/config/config.py @@ -5,7 +5,7 @@ from dataclasses import dataclass -LogLevelType = Literal["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] +type LogLevelType = Literal["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] @dataclass diff --git a/src/crx_repo/config/parser/parser.py b/src/crx_repo/config/parser/parser.py index 3f4a356..7e749f8 100644 --- a/src/crx_repo/config/parser/parser.py +++ b/src/crx_repo/config/parser/parser.py @@ -1,26 +1,19 @@ """Basic parser implementation.""" -import inspect +# pyright: reportAny=false + import logging from abc import ABC from abc import abstractmethod -from types import UnionType -from typing import Any -from typing import Literal -from typing import TypeVar from typing import Callable -from typing import TypeGuard -from typing import get_args from typing import overload -from typing import get_origin from pathlib import Path from crx_repo.config.config import Config -PathOrStr = Path | str -T = TypeVar("T") -ConfigJsonType = dict[str, Any] -KeyConverterType = Callable[[str], str] | None +type PathOrStr = Path | str +type ConfigJsonType = dict[str, str | int | None | ConfigJsonType] +type KeyConverterType = Callable[[str], str] | None _logger = logging.getLogger(__name__) @@ -55,101 +48,3 @@ async def support_async(self, path: Path) -> bool: @abstractmethod async def support_async(self, path: PathOrStr) -> bool: """Check if path is supported by the parser.""" - - @staticmethod - def deserialize( - cls_: type[T], - json: ConfigJsonType, - key_convert: KeyConverterType = None, - ) -> T: - """Deserialize json to a class. - - Args: - cls_(type[T]): The class itself, it must have a no-argument constructor. - json(ConfigJsonType): The json data. - key_convert(KeyConverterType): A converter to convert key between json and class. - It should accept key in json and return a string, - which represents the attribute name of cls_ instance. - It defaults to None, means do not convert. - - Returns: - T: The instance of cls_ - - Remarks: - This method is slow because using setattr() and getattr(), - please cache its result to speed up. - """ - instance = cls_() - type_of_instance = inspect.get_annotations(cls_) - for k, v in json.items(): # pyright: ignore[reportAny] - attr_name = key_convert(k) if key_convert is not None else k - if hasattr(instance, attr_name): - type_of_attr = type_of_instance.get(attr_name) - _logger.debug("Type of %s is %s", k, type_of_attr) - if type_of_attr is None: - _logger.debug( - "%s does not have a type hint, ignoring its deserialization.", - attr_name, - ) - elif ConfigParser._is_config_json(v): # pyright: ignore[reportAny] - _logger.debug("Calling deserialize() recursively.") - v_deserialized = ConfigParser.deserialize( # pyright: ignore[reportUnknownVariableType] - ConfigParser._ensure_instanceable(type_of_attr), # pyright: ignore[reportAny] - v, - key_convert, - ) - setattr(instance, attr_name, v_deserialized) - elif ConfigParser._is_generics_valid( - v, # pyright: ignore[reportAny] - type_of_attr, # pyright: ignore[reportAny] - ) or isinstance(v, type_of_attr): - _logger.debug("Type match, assigning value of %s directly.", k) - setattr(instance, attr_name, v) - else: - _logger.debug("Do not know how to deserialize %s, ignoring.", k) - return instance - - @staticmethod - def _is_config_json(obj: object) -> TypeGuard[ConfigJsonType]: - return isinstance(obj, dict) and all(isinstance(k, str) for k in obj) # pyright: ignore[reportUnknownVariableType] - - @staticmethod - def _is_generics_valid(v: object, t: type) -> bool: - args = get_args(t) - if len(args) > 0: - origin = get_origin(t) - if origin is Literal or origin is UnionType: - return v in args - if origin is list: - return isinstance(v, list) and ConfigParser._is_list_valid(v, t) # pyright: ignore[reportUnknownArgumentType] - raise NotImplementedError("Unsupported type", origin) - return False - - @staticmethod - def _is_list_valid(v: list[T], t: type[list[T]]) -> bool: - return (len(v) == 0) or all(isinstance(value, get_args(t)[0]) for value in v) - - @staticmethod - def _ensure_instanceable( - i: type, - checker: Callable[[type], bool] = callable, - ) -> type: - _logger.debug("Ensuring object %s is instanceable...", i) - if checker(i): - return i - if ConfigParser._is_union_type(i): - args = get_args(i) - matches = (arg for arg in args if checker(arg)) # pyright: ignore[reportAny] - found = next(matches, None) - if found is None: - raise ValueError("No instanceable object can be extracted in UnionType") - return found # pyright: ignore[reportAny] - raise NotImplementedError("Unsupported type", i) - - @staticmethod - def _is_union_type(i: type) -> TypeGuard[UnionType]: - args = get_args(i) - if len(args) > 0: - origin = get_origin(i) - return origin is UnionType - return False diff --git a/src/crx_repo/config/parser/toml.py b/src/crx_repo/config/parser/toml.py index bd2158a..68e5680 100644 --- a/src/crx_repo/config/parser/toml.py +++ b/src/crx_repo/config/parser/toml.py @@ -3,6 +3,7 @@ import tomllib from typing import override from pathlib import Path +from deserializer import deserialize from crx_repo.config.config import Config from crx_repo.config.parser.parser import PathOrStr from crx_repo.config.parser.parser import ConfigParser @@ -17,7 +18,7 @@ async def parse_async(self, path: PathOrStr) -> Config: if path not in self._cache: content = path.read_text() config_raw = tomllib.loads(content) - self._cache[path] = TomlConfigParser.deserialize( + self._cache[path] = deserialize( Config, config_raw, lambda x: x.replace("-", "_").lower(), # Kebab case to snake case ) diff --git a/src/crx_repo/server.py b/src/crx_repo/server.py index 467711a..f44b9d9 100644 --- a/src/crx_repo/server.py +++ b/src/crx_repo/server.py @@ -1,5 +1,9 @@ """Classes and functions to distribute manifest and crx files.""" +# pyright: reportUnknownMemberType=false +# pyright: reportUnknownVariableType=false +# pyright: reportAny=false + import asyncio import hashlib import logging @@ -12,6 +16,8 @@ from asyncio import CancelledError from asyncio import create_task from pathlib import Path +from watchfiles import Change +from watchfiles import awatch from urllib.parse import unquote from collections.abc import Generator from crx_repo.client import ExtensionDownloader @@ -23,6 +29,7 @@ _logger = logging.getLogger(__name__) +_cache: dict[str, set[str]] = {} def _get_ssl_context(tls: TlsHttpListenConfig | None) -> SSLContext | None: @@ -60,6 +67,33 @@ def _get_filters(xs: list[str]) -> list[tuple[str, str]]: return filters +def _watch_filter(change: Change, path: str) -> bool: + return change != Change.modified and path.endswith(".crx") + + +async def _watch_cache(cache: Path): + try: + async for changes in awatch(cache, watch_filter=_watch_filter): + for (change, path) in changes: + _logger.debug("Updating cache for path %s", path) + p = Path(path) + extension_version = p.stem + extension_id = p.parent.stem + match change: + case Change.added: + if extension_id in _cache: + _cache[extension_id].add(extension_version) + else: + _cache[extension_id] = {extension_version} + case Change.deleted: + if extension_id in _cache and extension_version in _cache[extension_id]: + _cache[extension_id].remove(extension_version) + case _: + pass + except CancelledError: + _logger.info("Stopping watcher...") + + async def _block(): sleep_seconds = 3600 while True: @@ -74,22 +108,29 @@ def _get_crx_info( cache_path: Path, filters: list[tuple[str, str]], ) -> Generator[tuple[str, tuple[str, int, str]], Any, None]: - if len(filters) > 0: - for crx, version in filters: - path = cache_path / crx / (version + ".crx") - if path.is_file(): - content = path.read_bytes() - info = (version, len(content), hashlib.sha256(content).hexdigest()) - yield crx, info - else: - for path in cache_path.glob("./*/*.crx"): - crx = path.parent.name - version = path.name.removesuffix(".crx") + if len(filters) == 0: + for crx, versions in _cache.items(): + for version in versions: + filters.append((crx, version)) + + for crx, version in filters: + path = cache_path / crx / (version + ".crx") + if path.is_file(): content = path.read_bytes() info = (version, len(content), hashlib.sha256(content).hexdigest()) yield crx, info +def _gen_cache(cache: Path): + for path in cache.glob("./*/*.crx"): + extension_version = path.stem + extension_id = path.parent.stem + if extension_id in _cache: + _cache[extension_id].add(extension_version) + else: + _cache[extension_id] = {extension_version} + + def setup_server( config: Config, debug: bool = False, @@ -107,7 +148,13 @@ def setup_server( extension_key = web.AppKey(extension, Task[None]) extension_keys.append(extension_key) + watcher_key = web.AppKey("cache-watcher", Task[None]) + async def register_services(app: web.Application): + _gen_cache(cache_path) + + app[watcher_key] = create_task(_watch_cache(cache_path)) + for extension_key in extension_keys: extension_id = config.extensions[extension_keys.index(extension_key)] downloader = ExtensionDownloader( @@ -125,6 +172,11 @@ async def register_services(app: web.Application): _ = app[extension_key].cancel() await app[extension_key] + _ = app[watcher_key].cancel() + await app[watcher_key] + + _cache.clear() + app.cleanup_ctx.append(register_services) prefix = config.prefix if config.prefix.startswith("/") else "/" + config.prefix @@ -140,7 +192,7 @@ async def _handle_manifest(request: web.Request) -> web.Response: filters = _get_filters(xs) for crx, info in _get_crx_info(cache_path, filters): - app = root.find("./app[@appid='%s']".format()) + app = root.find("./app[@appid='{}']".format(crx)) if app is None: app = Element("app") app.attrib["appid"] = crx