Skip to content

Commit

Permalink
Use watchfiles to track cache
Browse files Browse the repository at this point in the history
Catch asyncio.TimeoutError
  • Loading branch information
arenekosreal committed Sep 16, 2024
1 parent 94fc446 commit a5202c1
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 132 deletions.
77 changes: 76 additions & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ requires-python = "<4.0,>=3.12"
dependencies = [
"aiohttp<4.0.0,>=3.9.3",
"defusedxml>=0.7.1",
"watchfiles>=0.24.0",
"deserializer @ git+https://github.com/arenekosreal/deserializer.git",
]
name = "crx-repo"
description = "Download Chrom(e|ium) extensions from Chrome Web Store and serve a update manifest."
Expand Down
2 changes: 1 addition & 1 deletion src/crx_repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from crx_repo.config.parser import parse_config_async as _parse_config_async


__version__ = "0.1.0"
__version__ = "0.2.0"


_logger = logging.getLogger(__name__)
Expand Down
29 changes: 23 additions & 6 deletions src/crx_repo/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
from http import HTTPStatus
from typing import TypeGuard
from aiohttp import ClientError
from pathlib import Path
from urllib.parse import urlencode
from aiohttp.client import ClientSession
Expand Down Expand Up @@ -37,7 +38,7 @@ def __init__(
if self.proxy is not None:
_logger.info("Using proxy %s to download extension...", self.proxy)
self.CHROME_WEB_STORE_API_BASE = "https://clients2.google.com/service/update2/crx"
self.CHUNK_SIZE_BYTES = 10240
self.CHUNK_SIZE_BYTES = 1024 * 1024 # 1MB

async def download_forever(self):
"""Download extension forever."""
Expand All @@ -46,6 +47,12 @@ async def download_forever(self):
await self._do_download()
await asyncio.sleep(self.interval)
except asyncio.CancelledError:
_logger.debug("Cleaning old extensions...")
for p in sorted(
self.cache_path.rglob("*.crx"),
key=lambda p: p.stat().st_mtime,
)[:-1]:
p.unlink()
_logger.debug(
"Stopping downloader for extension %s",
self.extension_id,
Expand All @@ -68,12 +75,21 @@ async def _do_download(self):
if response.content_length != int(size):
_logger.warning("Content-Length is not equals to size returned by API.")
hash_calculator = hashlib.sha256()
extension_path = self.cache_path / (version + ".crx")
extension_path = self.cache_path / (version + ".crx.part")
with extension_path.open("wb") as writer:
async for chunk in response.content.iter_chunked(self.CHUNK_SIZE_BYTES):
_logger.debug("Writing %s byte(s) into %s...", len(chunk), extension_path)
hash_calculator.update(chunk)
_ = writer.write(chunk)
try:
async for chunk in response.content.iter_chunked(self.CHUNK_SIZE_BYTES):
chunk_size = writer.write(chunk)
hash_calculator.update(chunk)
_logger.debug(
"Writing %s byte(s) into %s...",
chunk_size,
extension_path,
)
except ClientError as e:
_logger.error("Failed to download because %s", e)
except asyncio.TimeoutError:
_logger.error("Failed to build because async operation timeout.")
_logger.debug("Checking checksums of extension %s...", self.extension_id)
sha256_hash = hash_calculator.hexdigest()
if sha256_hash != sha256:
Expand All @@ -87,6 +103,7 @@ async def _do_download(self):
"SHA256 checksum of %s match. Keeping file.",
self.extension_id,
)
_ = extension_path.rename(extension_path.parent / extension_path.stem)

async def _check_update(
self,
Expand Down
2 changes: 1 addition & 1 deletion src/crx_repo/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dataclasses import dataclass


LogLevelType = Literal["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
type LogLevelType = Literal["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]


@dataclass
Expand Down
115 changes: 5 additions & 110 deletions src/crx_repo/config/parser/parser.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,19 @@
"""Basic parser implementation."""

import inspect
# pyright: reportAny=false

import logging
from abc import ABC
from abc import abstractmethod
from types import UnionType
from typing import Any
from typing import Literal
from typing import TypeVar
from typing import Callable
from typing import TypeGuard
from typing import get_args
from typing import overload
from typing import get_origin
from pathlib import Path
from crx_repo.config.config import Config


PathOrStr = Path | str
T = TypeVar("T")
ConfigJsonType = dict[str, Any]
KeyConverterType = Callable[[str], str] | None
type PathOrStr = Path | str
type ConfigJsonType = dict[str, str | int | None | ConfigJsonType]
type KeyConverterType = Callable[[str], str] | None

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -55,101 +48,3 @@ async def support_async(self, path: Path) -> bool:
@abstractmethod
async def support_async(self, path: PathOrStr) -> bool:
"""Check if path is supported by the parser."""

@staticmethod
def deserialize(
cls_: type[T],
json: ConfigJsonType,
key_convert: KeyConverterType = None,
) -> T:
"""Deserialize json to a class.
Args:
cls_(type[T]): The class itself, it must have a no-argument constructor.
json(ConfigJsonType): The json data.
key_convert(KeyConverterType): A converter to convert key between json and class.
It should accept key in json and return a string,
which represents the attribute name of cls_ instance.
It defaults to None, means do not convert.
Returns:
T: The instance of cls_
Remarks:
This method is slow because using setattr() and getattr(),
please cache its result to speed up.
"""
instance = cls_()
type_of_instance = inspect.get_annotations(cls_)
for k, v in json.items(): # pyright: ignore[reportAny]
attr_name = key_convert(k) if key_convert is not None else k
if hasattr(instance, attr_name):
type_of_attr = type_of_instance.get(attr_name)
_logger.debug("Type of %s is %s", k, type_of_attr)
if type_of_attr is None:
_logger.debug(
"%s does not have a type hint, ignoring its deserialization.",
attr_name,
)
elif ConfigParser._is_config_json(v): # pyright: ignore[reportAny]
_logger.debug("Calling deserialize() recursively.")
v_deserialized = ConfigParser.deserialize( # pyright: ignore[reportUnknownVariableType]
ConfigParser._ensure_instanceable(type_of_attr), # pyright: ignore[reportAny]
v,
key_convert,
)
setattr(instance, attr_name, v_deserialized)
elif ConfigParser._is_generics_valid(
v, # pyright: ignore[reportAny]
type_of_attr, # pyright: ignore[reportAny]
) or isinstance(v, type_of_attr):
_logger.debug("Type match, assigning value of %s directly.", k)
setattr(instance, attr_name, v)
else:
_logger.debug("Do not know how to deserialize %s, ignoring.", k)
return instance

@staticmethod
def _is_config_json(obj: object) -> TypeGuard[ConfigJsonType]:
return isinstance(obj, dict) and all(isinstance(k, str) for k in obj) # pyright: ignore[reportUnknownVariableType]

@staticmethod
def _is_generics_valid(v: object, t: type) -> bool:
args = get_args(t)
if len(args) > 0:
origin = get_origin(t)
if origin is Literal or origin is UnionType:
return v in args
if origin is list:
return isinstance(v, list) and ConfigParser._is_list_valid(v, t) # pyright: ignore[reportUnknownArgumentType]
raise NotImplementedError("Unsupported type", origin)
return False

@staticmethod
def _is_list_valid(v: list[T], t: type[list[T]]) -> bool:
return (len(v) == 0) or all(isinstance(value, get_args(t)[0]) for value in v)

@staticmethod
def _ensure_instanceable(
i: type,
checker: Callable[[type], bool] = callable,
) -> type:
_logger.debug("Ensuring object %s is instanceable...", i)
if checker(i):
return i
if ConfigParser._is_union_type(i):
args = get_args(i)
matches = (arg for arg in args if checker(arg)) # pyright: ignore[reportAny]
found = next(matches, None)
if found is None:
raise ValueError("No instanceable object can be extracted in UnionType")
return found # pyright: ignore[reportAny]
raise NotImplementedError("Unsupported type", i)

@staticmethod
def _is_union_type(i: type) -> TypeGuard[UnionType]:
args = get_args(i)
if len(args) > 0:
origin = get_origin(i)
return origin is UnionType
return False
3 changes: 2 additions & 1 deletion src/crx_repo/config/parser/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tomllib
from typing import override
from pathlib import Path
from deserializer import deserialize
from crx_repo.config.config import Config
from crx_repo.config.parser.parser import PathOrStr
from crx_repo.config.parser.parser import ConfigParser
Expand All @@ -17,7 +18,7 @@ async def parse_async(self, path: PathOrStr) -> Config:
if path not in self._cache:
content = path.read_text()
config_raw = tomllib.loads(content)
self._cache[path] = TomlConfigParser.deserialize(
self._cache[path] = deserialize(
Config, config_raw,
lambda x: x.replace("-", "_").lower(), # Kebab case to snake case
)
Expand Down
Loading

0 comments on commit a5202c1

Please sign in to comment.