diff --git a/Readme.md b/Readme.md index 2b99c21..954cba5 100644 --- a/Readme.md +++ b/Readme.md @@ -33,6 +33,7 @@ scalable and reliable applications. This library intends to make it easy to impl - Bloom filters - Different cache invalidation techniques (time-based or tags) - Cache any objects securely with pickle (use [secret](#redis)) +- Save memory size with compression - 2x faster than `aiocache` (with client side caching) ## Usage Example @@ -135,31 +136,29 @@ _Requires [redis](https://github.com/redis/redis-py) package._\ This will use Redis as a storage. This backend uses [pickle](https://docs.python.org/3/library/pickle.html) module to serialize -values, but the cashes can store values with sha1-keyed hash. +values, but the cashes can store values with md5-keyed hash. Use `secret` and `digestmod` parameters to protect your application from security vulnerabilities. - The `digestmod` is a hashing algorithm that can be used: `sum`, `md5` (default), `sha1` and `sha256` - The `secret` is a salt for a hash. Pickle can't serialize any type of object. In case you need to store more complex types +you can use [dill](https://github.com/uqfoundation/dill) - set `pickle_type="dill"`. Dill is great, but less performance. -you can use [dill](https://github.com/uqfoundation/dill) - set `pickle_type="dill"`. -Dill is great, but less performance. If you need complex serializer for [sqlalchemy](https://docs.sqlalchemy.org/en/14/core/serializer.html) objects you can set `pickle_type="sqlalchemy"` Use `json` also an option to serialize/deserialize an object, but it very limited (`pickle_type="json"`) Any connection errors are suppressed, to disable it use `suppress=False` - a `CacheBackendInteractionError` will be raised -If you would like to use [client-side cache](https://redis.io/topics/client-side-caching) set `client_side=True` +For some data, it may be useful to use compression. Gzip and zlib compression are available; +you can use the `compress_type` parameter to configure it. -Client side cache will add `cashews:` prefix for each key, to customize it use `client_side_prefix` option. +If you would like to use [client-side cache](https://redis.io/topics/client-side-caching) set `client_side=True`. Client side cache will add `cashews:` prefix for each key, to customize it use `client_side_prefix` option. ```python cache.setup("redis://0.0.0.0/?db=1&minsize=10&suppress=false&secret=my_secret", prefix="func") cache.setup("redis://0.0.0.0/2", password="my_pass", socket_connect_timeout=0.1, retry_on_timeout=True, secret="my_secret") -cache.setup("redis://0.0.0.0", client_side=True, client_side_prefix="my_prefix:", pickle_type="dill") +cache.setup("redis://0.0.0.0", client_side=True, client_side_prefix="my_prefix:", pickle_type="dill", compress_type="gzip") ``` For using secure connections to redis (over ssl) uri should have `rediss` as schema @@ -177,10 +176,12 @@ This will use local sqlite databases (with shards) as storage. It is a good choice if you don't want to use redis, but you need a shared storage, or your cache takes a lot of local memory. Also, it is a good choice for client side local storage. -You can setup disk cache with [FanoutCache parameters](http://www.grantjenks.com/docs/diskcache/api.html#fanoutcache) +You can setup disk cache with [Cache parameters](https://grantjenks.com/docs/diskcache/api.html#diskcache.diskcache.DEFAULT_SETTINGS) ** Warning ** `cache.scan` and `cache.get_match` does not work with this storage (works only if shards are disabled) +** Warning ** Be careful with the [default settings](https://grantjenks.com/docs/diskcache/api.html#diskcache.diskcache.DEFAULT_SETTINGS) as they contain parameters such as `size_limit` + ```python cache.setup("disk://") cache.setup("disk://?directory=/tmp/cache&timeout=1&shards=0") # disable shards diff --git a/cashews/backends/interface.py b/cashews/backends/interface.py index 36bdd04..8a65995 100644 --- a/cashews/backends/interface.py +++ b/cashews/backends/interface.py @@ -236,6 +236,10 @@ def __init__(self, *args, serializer: Serializer | None = None, **kwargs) -> Non self._serializer = serializer self._on_remove_callbacks: list[OnRemoveCallback] = [] + @property + def serializer(self) -> Serializer | None: + return self._serializer + def on_remove_callback(self, callback: OnRemoveCallback) -> None: self._on_remove_callbacks.append(callback) diff --git a/cashews/compresors.py b/cashews/compresors.py new file mode 100644 index 0000000..ff3862f --- /dev/null +++ b/cashews/compresors.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import gzip +import zlib +from enum import Enum + +from .exceptions import DecompressionError, UnsupportedCompressorError + + +class CompressType(Enum): + NULL = "null" + GZIP = "gzip" + ZLIB = "zlib" + + +class Compressor: + @staticmethod + def compress(value: bytes) -> bytes: + return value + + @staticmethod + def decompress(value: bytes) -> bytes: + return value + + +class GzipCompressor(Compressor): + @staticmethod + def compress(value: bytes) -> bytes: + return gzip.compress(value) + + @staticmethod + def decompress(value: bytes) -> bytes: + try: + return gzip.decompress(value) + except gzip.BadGzipFile as exc: + raise DecompressionError from exc + + +class ZlibCompressor(Compressor): + @staticmethod + def compress(value: bytes) -> bytes: + return zlib.compress(value) + + @staticmethod + def decompress(value: bytes) -> bytes: + try: + return zlib.decompress(value) + except zlib.error as exc: + raise DecompressionError from exc + + +_compressors = { + CompressType.NULL: Compressor, + CompressType.GZIP: GzipCompressor, + CompressType.ZLIB: ZlibCompressor, +} + + +def get_compressor(compress_type: CompressType | None) -> type[Compressor]: + if compress_type is None: + return Compressor + if compress_type not in _compressors: + raise UnsupportedCompressorError + return _compressors[compress_type] diff --git a/cashews/exceptions.py b/cashews/exceptions.py index 33e27d5..21e5623 100644 --- a/cashews/exceptions.py +++ b/cashews/exceptions.py @@ -14,6 +14,14 @@ class UnsupportedPicklerError(CacheError): """Unknown or unsupported pickle type.""" +class UnsupportedCompressorError(CacheError): + """Unknown or unsupported compress type.""" + + +class DecompressionError(CacheError): + """Wrong compress data""" + + class UnSecureDataError(CacheError): """Unsecure data in cache storage""" diff --git a/cashews/picklers.py b/cashews/picklers.py index 2af7eff..f93669d 100644 --- a/cashews/picklers.py +++ b/cashews/picklers.py @@ -94,7 +94,7 @@ class PicklerType(Enum): } -def get_pickler(pickler_type: PicklerType): +def get_pickler(pickler_type: PicklerType) -> type[Pickler]: if pickler_type not in _picklers: raise UnsupportedPicklerError() diff --git a/cashews/serialize.py b/cashews/serialize.py index 35284e1..692d276 100644 --- a/cashews/serialize.py +++ b/cashews/serialize.py @@ -2,9 +2,11 @@ import hashlib import hmac +from contextlib import suppress from typing import TYPE_CHECKING -from .exceptions import SignIsMissingError, UnSecureDataError +from .compresors import Compressor, CompressType, get_compressor +from .exceptions import DecompressionError, SignIsMissingError, UnSecureDataError from .picklers import Pickler, PicklerType, get_pickler if TYPE_CHECKING: # pragma: no cover @@ -30,7 +32,15 @@ def _to_bytes(value: str | bytes) -> bytes: return value -class HashSigner: +class Signer: + def sign(self, key: Key, value: bytes) -> bytes: + return value + + def check_sign(self, key: Key, value: bytes) -> bytes: + return value + + +class HashSigner(Signer): _digestmods = { b"sha1": _seal(hashlib.sha1), b"md5": _seal(hashlib.md5), @@ -71,14 +81,7 @@ def _get_sign_and_digestmod(self, sign: bytes) -> tuple[bytes, bytes]: return sign, digestmod -class NullSigner: - @staticmethod - def sign(key: Key, value: bytes) -> bytes: - return value - - @staticmethod - def check_sign(key: Key, value: bytes) -> bytes: - return value +NullSigner = Signer class Serializer: @@ -88,13 +91,17 @@ def __init__(self, check_repr=False): self._check_repr = check_repr self._pickler = get_pickler(PicklerType.NULL) self._signer = NullSigner() + self._compressor = get_compressor(CompressType.NULL)() - def set_signer(self, signer): + def set_signer(self, signer: Signer) -> None: self._signer = signer - def set_pickler(self, pickler): + def set_pickler(self, pickler: Pickler) -> None: self._pickler = pickler + def set_compression(self, compressor: Compressor) -> None: + self._compressor = compressor + @classmethod def register_type(cls, klass: type, encoder, decoder): cls._type_mapping[bytes(klass.__name__, "utf8")] = (encoder, decoder) @@ -102,10 +109,14 @@ def register_type(cls, klass: type, encoder, decoder): async def encode(self, backend: Backend, key: Key, value: Value, expire: float | None) -> bytes: # on SET if isinstance(value, int) and not isinstance(value, bool): return value # type: ignore[return-value] + + value = await self._encode(backend, key, value, expire) + value = self._compressor.compress(value) + return self._signer.sign(key, value) + + async def _encode(self, backend: Backend, key: Key, value: Value, expire: float | None) -> bytes: _value = await self._custom_encode(backend, key, value, expire) - if _value is not None: - return self._signer.sign(key, _value) - return self._signer.sign(key, self._pickler.dumps(value)) + return _value or self._pickler.dumps(value) async def _custom_encode(self, backend, key: Key, value: Value, expire: float | None) -> bytes | None: value_type = bytes(type(value).__name__, "utf8") @@ -127,6 +138,11 @@ async def decode(self, backend: Backend, key: Key, value: bytes, default: Value) except SignIsMissingError: return default + # for backword compatibility we ignore decompression error because + # it is dynamic setting that can be changed by settings + with suppress(DecompressionError): + value = self._compressor.decompress(value) + try: value = self._decode(value) except self._pickler.UnpicklingError: @@ -180,18 +196,22 @@ def get_serializer( digestmod: str | bytes = b"md5", check_repr: bool = True, pickle_type: PicklerType | None = None, + compress_type: CompressType | str | None = None, ) -> Serializer: _serializer = Serializer(check_repr=check_repr) if secret: _serializer.set_signer(HashSigner(secret, digestmod)) _serializer.set_pickler(_get_pickler(pickle_type or PicklerType.NULL, bool(secret))) + if isinstance(compress_type, str): + compress_type = CompressType(compress_type) + _serializer.set_compression(get_compressor(compress_type)()) return _serializer def _get_pickler(pickle_type: PicklerType, hash_key: bool) -> Pickler: if pickle_type is PicklerType.NULL and hash_key: pickle_type = PicklerType.DEFAULT - return get_pickler(pickle_type) + return get_pickler(pickle_type)() DEFAULT_SERIALIZER = get_serializer(pickle_type=PicklerType.DEFAULT) diff --git a/cashews/wrapper/wrapper.py b/cashews/wrapper/wrapper.py index c4f20ee..be934ae 100644 --- a/cashews/wrapper/wrapper.py +++ b/cashews/wrapper/wrapper.py @@ -6,6 +6,7 @@ from cashews import validation from cashews.backends.interface import Backend from cashews.commands import Command +from cashews.compresors import CompressType from cashews.exceptions import NotConfiguredError from cashews.picklers import PicklerType from cashews.serialize import get_serializer @@ -71,6 +72,7 @@ def setup( digestmod=params.pop("digestmod", b"md5"), check_repr=params.pop("check_repr", True), pickle_type=PicklerType(params.pop("pickle_type", pickle_type)), + compress_type=CompressType(params.pop("compress_type", CompressType.NULL)), ) backend = backend_class(**params, serializer=serializer) if disable: diff --git a/tests/test_pickle_serializer.py b/tests/test_pickle_serializer.py index adbb235..789a7c1 100644 --- a/tests/test_pickle_serializer.py +++ b/tests/test_pickle_serializer.py @@ -24,27 +24,32 @@ class TestDC: @pytest.fixture( name="cache", params=[ - "default_md5", - "default_sum", - "default_sha256", - pytest.param("redis_md5", marks=pytest.mark.redis), - pytest.param("redis_sum", marks=pytest.mark.redis), - pytest.param("dill_sum", marks=pytest.mark.integration), - pytest.param("sqlalchemy_sha1", marks=pytest.mark.integration), + "default_md5_null", + "default_sum_zlib", + "default_sha256_null", + pytest.param("redis_md5_null", marks=pytest.mark.redis), + pytest.param("redis_md5_gzip", marks=pytest.mark.redis), + pytest.param("redis_sum_zlib", marks=pytest.mark.redis), + pytest.param("dill_sum_null", marks=pytest.mark.integration), + pytest.param("sqlalchemy_sha1_null", marks=pytest.mark.integration), ], ) async def _cache(request, redis_dsn): - pickle_type, digestmod = request.param.split("_") + pickle_type, digestmod, compress_type = request.param.split("_") if pickle_type == "redis": from cashews.backends.redis import Redis - redis = Redis(redis_dsn, suppress=False, serializer=get_serializer(secret=b"test", digestmod=digestmod)) + redis = Redis( + redis_dsn, + suppress=False, + serializer=get_serializer(secret=b"test", digestmod=digestmod, compress_type=compress_type), + ) await redis.init() await redis.clear() yield redis await redis.close() else: - yield Memory(serializer=get_serializer(secret=b"test", digestmod=digestmod)) + yield Memory(serializer=get_serializer(secret=b"test", digestmod=digestmod, compress_type=compress_type)) @pytest.mark.parametrize(