Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove internal caching due to hash collisions #72

Merged
merged 1 commit into from
Nov 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Changelog

- Remove internal caching due to hash collisions (#71)

#### 0.18.1 - 2020-11-21
- Canonicalise `anyOf` special cases when all subschemas have only the `type` keyword

Expand Down
60 changes: 3 additions & 57 deletions src/hypothesis_jsonschema/_encode.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Canonical encoding for the JSONSchema semantics, where 1 == 1.0."""
import functools
import json
import math
from json.encoder import _make_iterencode, encode_basestring_ascii # type: ignore
from typing import Any, Callable, Dict, Tuple, Type, Union
from typing import Any, Dict, Tuple, Union

# Mypy does not (yet!) support recursive type definitions.
# (and writing a few steps by hand is a DoS attack on the AST walker in Pytest)
Expand Down Expand Up @@ -36,62 +35,9 @@ def floatstr(o: float) -> str:
)(o, 0)


def _make_cache_key(
value: JSONType,
) -> Tuple[Type, Union[None, bool, float, str, tuple, frozenset]]:
"""Make a hashable object from any JSON value.

The idea is to recursively convert all mutable values to immutable and adding values types as a discriminant.
"""
if isinstance(value, dict):
return (dict, frozenset((k, _make_cache_key(v)) for k, v in value.items()))
if isinstance(value, list):
return (list, tuple(map(_make_cache_key, value)))
# Primitive types are hashable
# `type` is needed to distinguish false-ish values - 0, "", False have the same hash (0)
return (type(value), value)


class HashedJSON:
"""A proxy that holds a JSON value.

Adds a capability for the inner value to be cached, loosely based on `functools._HashedSeq`.
"""

__slots__ = ("value", "hashedvalue")

def __init__(self, value: JSONType):
self.value = value
# `hash` is called multiple times on cache miss, therefore it is evaluated only once
self.hashedvalue = hash(_make_cache_key(value))

def __hash__(self) -> int:
return self.hashedvalue

def __eq__(self, other: "HashedJSON") -> bool: # type: ignore
# TYPES: This class should be used only for caching purposes and there should be
# no values of other types to compare
return self.hashedvalue == other.hashedvalue


def cached_json(func: Callable[[HashedJSON], str]) -> Callable[[JSONType], str]:
"""Cache calls to `encode_canonical_json`.

The same schemas are encoded multiple times during canonicalisation and caching gives visible performance impact.
"""
cached_func = functools.lru_cache(maxsize=1024)(func)

@functools.wraps(cached_func)
def wrapped(value: JSONType) -> str:
return cached_func(HashedJSON(value))

return wrapped


@cached_json
def encode_canonical_json(value: HashedJSON) -> str:
def encode_canonical_json(value: JSONType) -> str:
"""Canonical form serialiser, for uniqueness testing."""
return json.dumps(value.value, sort_keys=True, cls=CanonicalisingJsonEncoder)
return json.dumps(value, sort_keys=True, cls=CanonicalisingJsonEncoder)


def sort_key(value: JSONType) -> Tuple[int, float, Union[float, str]]:
Expand Down