From 17e0140fe147690ecb1c81dbb89dabc8efc6e94e Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 29 Oct 2024 18:49:19 -0700 Subject: [PATCH 01/70] refactor oneOf, allOf --- guidance/library/_json.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 0ca544726..fae9d62d7 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -722,6 +722,29 @@ def anyOf( options = [self.json(json_schema=item) for item in anyof_list] return lm + select(options) + @guidance(stateless=True) + def oneOf( + self, + lm, + *, + oneof_list: Sequence[JSONSchema], + ): + if len(oneof_list) == 1: + return lm + self.json(json_schema=oneof_list[0]) + warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.") + return lm + self.anyOf(anyof_list=oneof_list) + + @guidance(stateless=True) + def allOf( + self, + lm, + *, + allof_list: Sequence[JSONSchema], + ): + if len(allof_list) != 1: + raise ValueError("Only support allOf with exactly one item") + return lm + self.json(json_schema=allof_list[0]) + @guidance(stateless=True) def const( self, @@ -842,20 +865,13 @@ def json( sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF) if sibling_keys: raise NotImplementedError(f"allOf with sibling keys is not yet supported. Got {sibling_keys}") - allof_list = json_schema[Keyword.ALLOF] - if len(allof_list) != 1: - raise ValueError("Only support allOf with exactly one item") - return lm + self.json(json_schema=allof_list[0]) + return lm + self.allOf(allof_list=json_schema[Keyword.ALLOF]) if Keyword.ONEOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF) if sibling_keys: raise NotImplementedError(f"oneOf with sibling keys is not yet supported. Got {sibling_keys}") - oneof_list = json_schema[Keyword.ONEOF] - if len(oneof_list) == 1: - return lm + self.json(json_schema=oneof_list[0]) - warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.") - return lm + self.anyOf(anyof_list=oneof_list) + return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF]) if Keyword.REF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.REF) From dfa5d2583428a11ec5fe9c41eb10b28dd85c9d19 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 29 Oct 2024 20:58:11 -0700 Subject: [PATCH 02/70] allOf tests from the JSON Schema test suite --- tests/unit/library/test_json_allOf.py | 285 ++++++++++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 tests/unit/library/test_json_allOf.py diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py new file mode 100644 index 000000000..6ef14aee3 --- /dev/null +++ b/tests/unit/library/test_json_allOf.py @@ -0,0 +1,285 @@ +from json import dumps as json_dumps + +import pytest +from jsonschema import ValidationError, validate + +from .test_json import check_match_failure, generate_and_check + + +class TestDynamicRefs: + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # allOf + ({"foo": "baz", "bar": 2}, True), + # mismatch second + ({"foo": "baz"}, False), + # mismatch first + ({"bar": 2}, False), + # wrong type + ({"foo": "baz", "bar": "quux"}, False), + ], + ) + def test_allOf(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [ + {"properties": {"bar": {"type": "integer"}}, "required": ["bar"]}, + {"properties": {"foo": {"type": "string"}}, "required": ["foo"]}, + ], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # valid + ({"foo": "quux", "bar": 2, "baz": None}, True), + # mismatch base schema + ({"foo": "quux", "baz": None}, False), + # mismatch first allOf + ({"bar": 2, "baz": None}, False), + # mismatch second allOf + ({"foo": "quux", "bar": 2}, False), + # mismatch both + ({"bar": 2}, False), + ], + ) + def test_allOf_with_base_schema(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": {"bar": {"type": "integer"}}, + "required": ["bar"], + "allOf": [ + {"properties": {"foo": {"type": "string"}}, "required": ["foo"]}, + {"properties": {"baz": {"type": "null"}}, "required": ["baz"]}, + ], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # valid + (25, True), + # mismatch one + (35, False), + ], + ) + def test_allOf_simple_types(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{"maximum": 30}, {"minimum": 20}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # any value is valid + ("foo", True) + ], + ) + def test_allOf_with_boolean_schemas_all_true(self, test_object, valid): + schema = {"$schema": "https://json-schema.org/draft/2020-12/schema", "allOf": [True, True]} + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # any value is invalid + ("foo", False) + ], + ) + def test_allOf_with_boolean_schemas_some_false(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [True, False], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # any value is invalid + ("foo", False) + ], + ) + def test_allOf_with_boolean_schemas_all_false(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [False, False], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # any data is valid + (1, True) + ], + ) + def test_allOf_with_one_empty_schema(self, test_object, valid): + schema = {"$schema": "https://json-schema.org/draft/2020-12/schema", "allOf": [{}]} + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # any data is valid + (1, True) + ], + ) + def test_allOf_with_two_empty_schemas(self, test_object, valid): + schema = {"$schema": "https://json-schema.org/draft/2020-12/schema", "allOf": [{}, {}]} + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # number is valid + (1, True), + # string is invalid + ("foo", False), + ], + ) + def test_allOf_with_the_first_empty_schema(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{}, {"type": "number"}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # number is valid + (1, True), + # string is invalid + ("foo", False), + ], + ) + def test_allOf_with_the_last_empty_schema(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{"type": "number"}, {}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # null is valid + (None, True), + # anything non-null is invalid + (123, False), + ], + ) + def test_nested_allOf_to_check_validation_semantics(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{"allOf": [{"type": "null"}]}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # allOf: false, anyOf: false, oneOf: false + (1, False), + # allOf: false, anyOf: false, oneOf: true + (5, False), + # allOf: false, anyOf: true, oneOf: false + (3, False), + # allOf: false, anyOf: true, oneOf: true + (15, False), + # allOf: true, anyOf: false, oneOf: false + (2, False), + # allOf: true, anyOf: false, oneOf: true + (10, False), + # allOf: true, anyOf: true, oneOf: false + (6, False), + # allOf: true, anyOf: true, oneOf: true + (30, True), + ], + ) + def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{"multipleOf": 2}], + "anyOf": [{"multipleOf": 3}], + "oneOf": [{"multipleOf": 5}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) From b505f1d8a8433e6bcd9ca80515c121ebab5e1647 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 29 Oct 2024 20:59:25 -0700 Subject: [PATCH 03/70] prototype allOf --- guidance/library/_json.py | 148 +++++++++++++++++++++++++++++++++++--- 1 file changed, 138 insertions(+), 10 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index fae9d62d7..9974584d9 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -739,11 +739,91 @@ def allOf( self, lm, *, - allof_list: Sequence[JSONSchema], + parent_schema: JSONSchema, ): - if len(allof_list) != 1: - raise ValueError("Only support allOf with exactly one item") - return lm + self.json(json_schema=allof_list[0]) + type = set(JSONType) + properties = {} + required = set() + additional_properties_list = [] + other_data = {} + + def handle_keyword(key: str, value: Any): + nonlocal type + nonlocal required + + if key == Keyword.REF: + raise NotImplementedError("allOf with $ref is not yet supported") + + elif key == Keyword.TYPE: + # TODO: Need to handle type-narrowing correctly: if we have a "number" and an "integer", we should only keep "integer". + # For now, we'll just intersect the types. + value = cast(Union[str, Sequence[str]], value) + if isinstance(value, str): + type = {value} + else: + type &= set(value) + # Throw an error early if we have conflicting types + if not type: + raise ValueError("allOf with conflicting types") + + elif key == Keyword.ALLOF: + value = cast(Sequence[JSONSchema], value) + for schema in value: + add_schema(schema) + + elif key == ObjectKeywords.PROPERTIES: + value = cast(Mapping[str, JSONSchema], value) + for name, schema in value.items(): + if name in properties: + # Will be recursively merged later + properties[name] = {"allOf": [properties[name], schema]} + else: + properties[name] = schema + + elif key == ObjectKeywords.REQUIRED: + value = cast(Sequence[str], value) + required |= set(value) + + elif key == ObjectKeywords.ADDITIONAL_PROPERTIES: + value = cast(JSONSchema, value) + additional_properties_list.append(value) + + elif key in set(Keyword): + # If we've done our job right, we should never hit this case... + raise NotImplementedError(f"Don't yet know how to handle {key} in allOf") + + elif key in other_data: + raise NotImplementedError(f"Don't yet know how to reduce multiple values of {key!r} in allOf") + + else: + other_data[key] = value + + def add_schema(schema: JSONSchema): + nonlocal type + if schema is True: + return + if schema is False: + raise ValueError("allOf contains a False schema") + for key, value in schema.items(): + if key in IGNORED_KEYS: + continue + handle_keyword(key, value) + + add_schema(parent_schema) + + combined_schema = { + Keyword.TYPE: type, + **other_data + } + if properties: + combined_schema[ObjectKeywords.PROPERTIES] = properties + if required: + combined_schema[ObjectKeywords.REQUIRED] = required + if additional_properties_list: + combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list} + + return lm + self.json(json_schema=combined_schema) + @guidance(stateless=True) def const( @@ -855,18 +935,66 @@ def json( validate_json_node_keys(json_schema) + if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema: + parent_schema = json_schema.copy() + anyof_list = parent_schema.pop(Keyword.ANYOF) + allof_list = parent_schema.pop(Keyword.ALLOF) + oneof_list = parent_schema.pop(Keyword.ONEOF) + # Reduce the problem to a oneOf of anyOfs of allOfs + return lm + self.oneOf( + oneof_list=[ + {"anyOf": [ + {"allOf": [one_item, any_item, *allof_list]} + for any_item in anyof_list + ]} + for one_item in oneof_list + ] + ) + + if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema: + parent_schema = json_schema.copy() + anyof_list = parent_schema.pop(Keyword.ANYOF) + allof_list = parent_schema.pop(Keyword.ALLOF) + # Reduce the problem to an anyOf of allOfs + return lm + self.anyOf( + anyof_list=[ + {"allOf": [any_item, *allof_list]} + for any_item in anyof_list + ] + ) + + if Keyword.ALLOF in json_schema and Keyword.ONEOF in json_schema: + parent_schema = json_schema.copy() + oneof_list = parent_schema.pop(Keyword.ONEOF) + allof_list = parent_schema.pop(Keyword.ALLOF) + # Reduce the problem to a oneOf of allOfs + return lm + self.oneOf( + anyof_list=[ + {"allOf": [one_item, *allof_list]} + for one_item in oneof_list + ] + ) + + if Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema: + parent_schema = json_schema.copy() + oneof_list = parent_schema.pop(Keyword.ONEOF) + anyof_list = parent_schema.pop(Keyword.ANYOF) + # Reduce the problem to a oneOf of anyOfs + return lm + self.oneOf( + oneof_list=[ + {"anyOf": anyof_list} + ] + ) + + if Keyword.ALLOF in json_schema: + return lm + self.allOf(parent_schema=json_schema) + if Keyword.ANYOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF) if sibling_keys: raise NotImplementedError(f"anyOf with sibling keys is not yet supported. Got {sibling_keys}") return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF]) - if Keyword.ALLOF in json_schema: - sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF) - if sibling_keys: - raise NotImplementedError(f"allOf with sibling keys is not yet supported. Got {sibling_keys}") - return lm + self.allOf(allof_list=json_schema[Keyword.ALLOF]) - if Keyword.ONEOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF) if sibling_keys: From c43c76c6e3776e0ba298b19258946190664830f8 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 09:27:12 -0700 Subject: [PATCH 04/70] fix nesting of sibling oneOf, allOf, anyOf --- guidance/library/_json.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 9974584d9..afde1d624 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -965,11 +965,11 @@ def json( if Keyword.ALLOF in json_schema and Keyword.ONEOF in json_schema: parent_schema = json_schema.copy() - oneof_list = parent_schema.pop(Keyword.ONEOF) allof_list = parent_schema.pop(Keyword.ALLOF) + oneof_list = parent_schema.pop(Keyword.ONEOF) # Reduce the problem to a oneOf of allOfs return lm + self.oneOf( - anyof_list=[ + oneof_list=[ {"allOf": [one_item, *allof_list]} for one_item in oneof_list ] @@ -977,12 +977,14 @@ def json( if Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema: parent_schema = json_schema.copy() - oneof_list = parent_schema.pop(Keyword.ONEOF) anyof_list = parent_schema.pop(Keyword.ANYOF) - # Reduce the problem to a oneOf of anyOfs + oneof_list = parent_schema.pop(Keyword.ONEOF) + # Reduce the problem to a oneOf of allOfs return lm + self.oneOf( oneof_list=[ - {"anyOf": anyof_list} + {"allOf": [one_item, any_item]} + for any_item in anyof_list + for one_item in oneof_list ] ) From be2492123b32cd8b8b5bd3242be9fe1b64dc35aa Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 09:30:27 -0700 Subject: [PATCH 05/70] pass parent schema down to allOf --- guidance/library/_json.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index afde1d624..c4edb4e31 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -944,7 +944,7 @@ def json( return lm + self.oneOf( oneof_list=[ {"anyOf": [ - {"allOf": [one_item, any_item, *allof_list]} + {"allOf": [one_item, any_item, *allof_list], **parent_schema} for any_item in anyof_list ]} for one_item in oneof_list @@ -958,7 +958,7 @@ def json( # Reduce the problem to an anyOf of allOfs return lm + self.anyOf( anyof_list=[ - {"allOf": [any_item, *allof_list]} + {"allOf": [any_item, *allof_list], **parent_schema} for any_item in anyof_list ] ) @@ -970,7 +970,7 @@ def json( # Reduce the problem to a oneOf of allOfs return lm + self.oneOf( oneof_list=[ - {"allOf": [one_item, *allof_list]} + {"allOf": [one_item, *allof_list], **parent_schema} for one_item in oneof_list ] ) @@ -979,10 +979,11 @@ def json( parent_schema = json_schema.copy() anyof_list = parent_schema.pop(Keyword.ANYOF) oneof_list = parent_schema.pop(Keyword.ONEOF) + assert Keyword.ALLOF not in parent_schema # Reduce the problem to a oneOf of allOfs return lm + self.oneOf( oneof_list=[ - {"allOf": [one_item, any_item]} + {"allOf": [one_item, any_item], **parent_schema} for any_item in anyof_list for one_item in oneof_list ] From f4d37897e9496545ae009a66e699cf54e8be7d99 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 09:35:52 -0700 Subject: [PATCH 06/70] validate node keys when recursively calling add_schema --- guidance/library/_json.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index c4edb4e31..2eeac46be 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -804,6 +804,9 @@ def add_schema(schema: JSONSchema): return if schema is False: raise ValueError("allOf contains a False schema") + # Validate the schema's keys (we have only validated the parent schema's keys so far) + # TODO: This will make us validate the parent twice... should probably be refactored + validate_json_node_keys(schema) for key, value in schema.items(): if key in IGNORED_KEYS: continue From 1102bc291515e9b103447df7a12b86b5fe5db0b0 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 09:36:18 -0700 Subject: [PATCH 07/70] no longer nonlocal --- guidance/library/_json.py | 1 - 1 file changed, 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 2eeac46be..34f4f1cc5 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -799,7 +799,6 @@ def handle_keyword(key: str, value: Any): other_data[key] = value def add_schema(schema: JSONSchema): - nonlocal type if schema is True: return if schema is False: From b7043bae4373f8ddca0f6296fc2a61a662f62b51 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 09:53:31 -0700 Subject: [PATCH 08/70] punt to allOf for handling sibling keys --- guidance/library/_json.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 34f4f1cc5..30ddcef47 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -996,21 +996,42 @@ def json( if Keyword.ANYOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF) - if sibling_keys: - raise NotImplementedError(f"anyOf with sibling keys is not yet supported. Got {sibling_keys}") - return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF]) + if not sibling_keys: + return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF]) + # Let the allOf function handle anyOfs with sibling keys + parent_schema = json_schema.copy() + anyof_list = parent_schema.pop(Keyword.ANYOF) + return lm + self.anyOf( + anyof_list=[ + {"allOf": [any_item], **parent_schema} + for any_item in anyof_list + ] + ) if Keyword.ONEOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF) - if sibling_keys: - raise NotImplementedError(f"oneOf with sibling keys is not yet supported. Got {sibling_keys}") - return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF]) + if not sibling_keys: + return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF]) + # Let the allOf function handle oneOfs with sibling keys + parent_schema = json_schema.copy() + oneof_list = parent_schema.pop(Keyword.ONEOF) + assert Keyword.ALLOF not in parent_schema + return lm + self.oneOf( + oneof_list=[ + {"allOf": [one_item], **parent_schema} + for one_item in oneof_list + ] + ) if Keyword.REF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.REF) - if sibling_keys: - raise NotImplementedError(f"$ref with sibling keys is not yet supported. Got {sibling_keys}") - return lm + self.ref(reference=json_schema[Keyword.REF]) + if not sibling_keys: + return lm + self.ref(reference=json_schema[Keyword.REF]) + # Let the allOf function handle refs with sibling keys + parent_schema = json_schema.copy() + ref = parent_schema.pop(Keyword.REF) + assert Keyword.ALLOF not in parent_schema + return lm + self.allOf(parent_schema={"allOf": [{Keyword.REF: ref}], **parent_schema}) if Keyword.CONST in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM} From 79b8ffb3d2ad73d33cc65d3d223fb47019cb32a9 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 09:54:38 -0700 Subject: [PATCH 09/70] ref in allof --- guidance/library/_json.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 30ddcef47..441986797 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -747,12 +747,23 @@ def allOf( additional_properties_list = [] other_data = {} + resolver = self._resolver.lookup(self._base_uri).resolver + def handle_keyword(key: str, value: Any): nonlocal type nonlocal required + nonlocal resolver if key == Keyword.REF: - raise NotImplementedError("allOf with $ref is not yet supported") + value = cast(str, value) + resolved = resolver.lookup(value) + # Some funky resolver scope to handle here... We have to pretend to be the original schema + # TODO: we have a totally separate REF implementation for when we have no sibling keys. Need to refactor. + # TODO: this will probably break if we have a recursive reference in an allOf + old_resolver = resolver + resolver = resolved.resolver + add_schema(resolved.contents) + resolver = old_resolver elif key == Keyword.TYPE: # TODO: Need to handle type-narrowing correctly: if we have a "number" and an "integer", we should only keep "integer". From 02d12be6ddb60012f7a21a3a1add526e5e8fe2e5 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 09:54:52 -0700 Subject: [PATCH 10/70] types --- guidance/library/_json.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 441986797..f1d8c8c03 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -33,7 +33,8 @@ from ._pydantic import pydantic_to_json_schema from ._subgrammar import as_regular_grammar, lexeme, subgrammar -JSONSchema = Union[bool, Mapping[str, Any]] +JSONValue = Union[None, bool, int, float, str, Mapping[str, "JSONValue"], Sequence["JSONValue"]] +JSONSchema = Union[bool, Mapping[str, JSONValue]] DRAFT202012_RESERVED_KEYWORDS = { # Anchors and References @@ -749,7 +750,7 @@ def allOf( resolver = self._resolver.lookup(self._base_uri).resolver - def handle_keyword(key: str, value: Any): + def handle_keyword(key: str, value: JSONValue): nonlocal type nonlocal required nonlocal resolver From e2518220097af338561b9082801ab5fd09e12b6d Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 10:14:08 -0700 Subject: [PATCH 11/70] type narrowing --- guidance/library/_json.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index f1d8c8c03..c95de7249 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -771,9 +771,13 @@ def handle_keyword(key: str, value: JSONValue): # For now, we'll just intersect the types. value = cast(Union[str, Sequence[str]], value) if isinstance(value, str): - type = {value} + value_set = {value} else: - type &= set(value) + value_set = set(value) + if JSONType.NUMBER in value_set: + # Number implies integer + value_set.add(JSONType.INTEGER) + type &= value_set # Throw an error early if we have conflicting types if not type: raise ValueError("allOf with conflicting types") From 558a2369d7eb09952b9933f7c15551a97918c33d Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 10:17:00 -0700 Subject: [PATCH 12/70] fix test that was supposed to fail under old logic but now passes --- tests/unit/library/test_json.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py index d320fdb4f..4448a12bd 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/test_json.py @@ -2240,24 +2240,12 @@ def test_allOf_ref(self): generate_and_check(target_obj, schema_obj) def test_allOf_bad_schema(self): - schema = """{ - "allOf" : [{ "type": "integer" }, { "type": "number" }] + schema = { + "allOf" : [{ "type": "integer" }, { "type": "string" }] } - """ - # First sanity check what we're setting up - schema_obj = json.loads(schema) - - TARGET_VALUE = 20 - validate(instance=TARGET_VALUE, schema=schema_obj) - - prepared_string = f"{json_dumps(TARGET_VALUE)}" - lm = models.Mock(prepared_string.encode()) - - # Run with the mock model - CAPTURE_KEY = "my_capture" with pytest.raises(ValueError) as ve: - lm += gen_json(name=CAPTURE_KEY, schema=schema_obj) - assert ve.value.args[0] == "Only support allOf with exactly one item" + _ = gen_json(schema=schema) + assert ve.value.args[0] == "allOf with conflicting types" class TestOneOf: @pytest.mark.parametrize("target_obj", [123, 42]) From 7296061476e7db2622dbf736a287321a8dff3adf Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 10:18:49 -0700 Subject: [PATCH 13/70] un xfail ref with siblings --- tests/unit/library/test_json.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py index 4448a12bd..f1e38f6b8 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/test_json.py @@ -1273,7 +1273,6 @@ def test_nested_refs(self, test_object, valid): ({"foo": "string"}, False), ], ) - @pytest.mark.xfail(reason="sibling keywords to ref are not yet supported") def test_ref_applies_alongside_sibling_keywords(self, test_object, valid): schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", From cf6eb15e312512e3af3887e84d261a3e982347f9 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 10:34:24 -0700 Subject: [PATCH 14/70] remove xfails for siblings (still failing but for wrong reason...) --- tests/unit/library/test_json.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py index f1e38f6b8..52672730c 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/test_json.py @@ -1562,7 +1562,6 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct( ({"foo": {"bar": "a"}, "bar": "a"}, True), ], ) - @pytest.mark.xfail(reason="refs with sibling keywords are not yet supported") def test_refs_with_relative_uris_and_defs(self, test_object, valid): schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", @@ -1595,7 +1594,6 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid): ({"foo": {"bar": "a"}, "bar": "a"}, True), ], ) - @pytest.mark.xfail(reason="refs with sibling keywords are not yet supported") def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid): schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", From 5c1df52b378015a4dc605a418b7be3e7f6f0829a Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 12:13:01 -0700 Subject: [PATCH 15/70] drop todo --- guidance/library/_json.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index c95de7249..b7fe8b86a 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -767,8 +767,6 @@ def handle_keyword(key: str, value: JSONValue): resolver = old_resolver elif key == Keyword.TYPE: - # TODO: Need to handle type-narrowing correctly: if we have a "number" and an "integer", we should only keep "integer". - # For now, we'll just intersect the types. value = cast(Union[str, Sequence[str]], value) if isinstance(value, str): value_set = {value} From 19aff29ab1ccec7039bf5477b0ca4ebb0d4a298e Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 12:27:44 -0700 Subject: [PATCH 16/70] items --- guidance/library/_json.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index b7fe8b86a..40c08e2db 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -743,10 +743,11 @@ def allOf( parent_schema: JSONSchema, ): type = set(JSONType) - properties = {} - required = set() - additional_properties_list = [] - other_data = {} + properties: dict[str, JSONSchema] = {} + required: set[str] = set() + additional_properties_list: list[JSONSchema] = [] + items_list: list[JSONSchema] = [] + other_data: dict[str, JSONValue] = {} resolver = self._resolver.lookup(self._base_uri).resolver @@ -799,9 +800,15 @@ def handle_keyword(key: str, value: JSONValue): required |= set(value) elif key == ObjectKeywords.ADDITIONAL_PROPERTIES: + # TODO: do the additionalProperties of one schema need to evaluate against the properties of another? + # TODO: unevaluatedProperties? value = cast(JSONSchema, value) additional_properties_list.append(value) + elif key == ArrayKeywords.ITEMS: + value = cast(JSONSchema, value) + items_list.append(value) + elif key in set(Keyword): # If we've done our job right, we should never hit this case... raise NotImplementedError(f"Don't yet know how to handle {key} in allOf") @@ -837,6 +844,8 @@ def add_schema(schema: JSONSchema): combined_schema[ObjectKeywords.REQUIRED] = required if additional_properties_list: combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list} + if items_list: + combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list} return lm + self.json(json_schema=combined_schema) From fa8617811941cf209ffe0754f8523bcb3d3dc8a2 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 12:29:11 -0700 Subject: [PATCH 17/70] safer update --- guidance/library/_json.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 40c08e2db..912d8e214 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -836,7 +836,6 @@ def add_schema(schema: JSONSchema): combined_schema = { Keyword.TYPE: type, - **other_data } if properties: combined_schema[ObjectKeywords.PROPERTIES] = properties @@ -847,6 +846,9 @@ def add_schema(schema: JSONSchema): if items_list: combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list} + assert not set(combined_schema) & set(other_data) + combined_schema.update(other_data) + return lm + self.json(json_schema=combined_schema) From 70d27ec78c395faf17628f1b1369c99f7d5eaa35 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 13:30:00 -0700 Subject: [PATCH 18/70] defaultdict for allOf properties --- guidance/library/_json.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 912d8e214..d16cc8885 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -16,6 +16,7 @@ import warnings import referencing import contextlib +from collections import defaultdict from urllib.parse import urljoin try: @@ -743,7 +744,7 @@ def allOf( parent_schema: JSONSchema, ): type = set(JSONType) - properties: dict[str, JSONSchema] = {} + properties: defaultdict[str, list[JSONSchema]] = defaultdict(list) required: set[str] = set() additional_properties_list: list[JSONSchema] = [] items_list: list[JSONSchema] = [] @@ -789,11 +790,7 @@ def handle_keyword(key: str, value: JSONValue): elif key == ObjectKeywords.PROPERTIES: value = cast(Mapping[str, JSONSchema], value) for name, schema in value.items(): - if name in properties: - # Will be recursively merged later - properties[name] = {"allOf": [properties[name], schema]} - else: - properties[name] = schema + properties[name].append(schema) elif key == ObjectKeywords.REQUIRED: value = cast(Sequence[str], value) @@ -838,7 +835,9 @@ def add_schema(schema: JSONSchema): Keyword.TYPE: type, } if properties: - combined_schema[ObjectKeywords.PROPERTIES] = properties + combined_schema[ObjectKeywords.PROPERTIES] = {} + for name, schemas in properties.items(): + combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas} if required: combined_schema[ObjectKeywords.REQUIRED] = required if additional_properties_list: From 0d5b053c883ed080d4fece9048884a4f6d8b24e1 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 15:58:40 -0700 Subject: [PATCH 19/70] flatten allOfs when possible --- guidance/library/_json.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index d16cc8885..790e9734b 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -837,13 +837,22 @@ def add_schema(schema: JSONSchema): if properties: combined_schema[ObjectKeywords.PROPERTIES] = {} for name, schemas in properties.items(): - combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas} + if len(schemas) == 1: + combined_schema[ObjectKeywords.PROPERTIES][name] = schemas[0] + else: + combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas} if required: combined_schema[ObjectKeywords.REQUIRED] = required if additional_properties_list: - combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list} + if len(additional_properties_list) == 1: + combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = additional_properties_list[0] + else: + combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list} if items_list: - combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list} + if len(items_list) == 1: + combined_schema[ArrayKeywords.ITEMS] = items_list[0] + else: + combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list} assert not set(combined_schema) & set(other_data) combined_schema.update(other_data) From f3be1ecdd61db65f23c520ee290bf582fe881bde Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 16:11:35 -0700 Subject: [PATCH 20/70] pass around base uri rather than context manager for finer grained control --- guidance/library/_json.py | 163 ++++++++++++++++++++------------------ 1 file changed, 84 insertions(+), 79 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 790e9734b..c70a8ac8b 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -15,9 +15,14 @@ ) import warnings import referencing -import contextlib from collections import defaultdict -from urllib.parse import urljoin +import urllib.parse + +def urijoin(base: str, uri: str) -> str: + # Special case for fragment-only URIs + if uri.startswith("#"): + return f"{base}{uri}" + return urllib.parse.urljoin(base, uri) try: import jsonschema @@ -131,6 +136,7 @@ class Keyword(str, Enum): ANYOF = "anyOf" ALLOF = "allOf" # Note: Partial support. Only supports exactly one item. ONEOF = "oneOf" # Note: Partial support. This is converted to anyOf. + ID = "$id" REF = "$ref" CONST = "const" ENUM = "enum" @@ -171,7 +177,6 @@ class ObjectKeywords(str, Enum): "$anchor", "$defs", "$schema", - "$id", "id", "$comment", "title", @@ -430,6 +435,7 @@ def ref( lm, *, reference: str, + base_uri: str, ): """ Resolve a reference to another schema and return the grammar for that schema. @@ -438,53 +444,24 @@ def ref( add it to the _defs cache. This allows us to avoid re-resolving the reference every time and to handle recursive references correctly. """ - abspath = self._get_abspath(reference) + abspath = urijoin(base_uri, reference) + if abspath not in self._defs: resolved = self._resolver.lookup(abspath) base_uri_of_resolved = resolved.resolver._base_uri @guidance(stateless=True, dedent=False, cache=True) def closure(lm): - with self._base_uri_context(base_uri_of_resolved): - grammar = self.json(json_schema=resolved.contents) + grammar = self.json(json_schema=resolved.contents, base_uri=base_uri_of_resolved) return lm + grammar self._defs[abspath] = closure return lm + self._defs[abspath]() - def _get_abspath(self, ref): - """ - Convert a reference to an absolute path, resolving it against the base URI if necessary. - This will allow us to get a unique key for each reference and hit the _defs cache correctly. - """ - if ref.startswith("#"): - # Special case for fragment-only references: - # for certain schemes (e.g. urn), urljoin may throw the base URI, but we need to keep them around - return f"{self._base_uri}{ref}" - return urljoin(self._base_uri, ref) - - - @contextlib.contextmanager - def _base_uri_context(self, base_uri: str): - """ - Temporarily replace the base_uri for the duration of the context manager. - This allows refs with different base URIs to be resolved correctly without passing the resolver around. - - Note: very much not thread-safe, but I don't expect instances of this class to be shared between threads. - TODO: ensure that the instance's hash depends on the base_uri before adding more caching to this class. - """ - old_base_uri = self._base_uri - self._base_uri = base_uri - try: - yield - finally: - self._base_uri = old_base_uri - - @guidance(stateless=True) def root(self, lm): - return lm + self.json(json_schema=self.schema) + return lm + self.json(json_schema=self.schema, base_uri=self._base_uri) @classmethod @@ -568,6 +545,7 @@ def object( properties: Mapping[str, JSONSchema], additional_properties: JSONSchema, required: Sequence[str], + base_uri: str, ): # "required" keys will be validated against "properties" if they're present, otherwise against "additionalProperties". # If "additionalProperties" is False, then required keys must be in "properties". @@ -588,7 +566,7 @@ def object( # Identify if the key is required required_items.append(name in required) # Build the grammar we'll use for this property - grammars.append(f'{key}{self.key_separator}' + self.json(json_schema=properties.get(name, additional_properties))) + grammars.append(f'{key}{self.key_separator}' + self.json(json_schema=properties.get(name, additional_properties), base_uri=base_uri)) if additional_properties is not False: # Key for additionalProperties is a json string, but we need to disallow any properties that are already defined @@ -604,7 +582,7 @@ def object( else: additional_key_grammar = self.string() - additional_item_grammar = additional_key_grammar + self.key_separator + self.json(json_schema=additional_properties) + additional_item_grammar = additional_key_grammar + self.key_separator + self.json(json_schema=additional_properties, base_uri=base_uri) additional_items_grammar = sequence(additional_item_grammar + self.item_separator) + additional_item_grammar grammars.append(additional_items_grammar) required_items.append(False) @@ -651,6 +629,7 @@ def array( item_schema: JSONSchema, min_items: int, max_items: Optional[int], + base_uri: str, ): if len(prefix_items_schema) < min_items and item_schema is False: raise ValueError( @@ -675,7 +654,7 @@ def array( assert i >= min_items break - item = self.json(json_schema=schema) + item = self.json(json_schema=schema, base_uri=base_uri) if i < min_items: required_items.append(item) @@ -684,7 +663,7 @@ def array( if max_items is None and item_schema is not False: # Add an infinite tail of items - item = self.json(json_schema=item_schema) + item = self.json(json_schema=item_schema, base_uri=base_uri) optional_items.append(item + sequence(self.item_separator + item)) lm += "[" @@ -720,8 +699,9 @@ def anyOf( lm, *, anyof_list: Sequence[JSONSchema], + base_uri: str, ): - options = [self.json(json_schema=item) for item in anyof_list] + options = [self.json(json_schema=item, base_uri=base_uri) for item in anyof_list] return lm + select(options) @guidance(stateless=True) @@ -730,11 +710,12 @@ def oneOf( lm, *, oneof_list: Sequence[JSONSchema], + base_uri: str, ): if len(oneof_list) == 1: - return lm + self.json(json_schema=oneof_list[0]) + return lm + self.json(json_schema=oneof_list[0], base_uri=base_uri) warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.") - return lm + self.anyOf(anyof_list=oneof_list) + return lm + self.anyOf(anyof_list=oneof_list, base_uri=base_uri) @guidance(stateless=True) def allOf( @@ -742,6 +723,7 @@ def allOf( lm, *, parent_schema: JSONSchema, + base_uri: str, ): type = set(JSONType) properties: defaultdict[str, list[JSONSchema]] = defaultdict(list) @@ -750,23 +732,15 @@ def allOf( items_list: list[JSONSchema] = [] other_data: dict[str, JSONValue] = {} - resolver = self._resolver.lookup(self._base_uri).resolver - - def handle_keyword(key: str, value: JSONValue): + def handle_keyword(key: str, value: JSONValue, base_uri: str): nonlocal type nonlocal required - nonlocal resolver if key == Keyword.REF: - value = cast(str, value) - resolved = resolver.lookup(value) - # Some funky resolver scope to handle here... We have to pretend to be the original schema - # TODO: we have a totally separate REF implementation for when we have no sibling keys. Need to refactor. - # TODO: this will probably break if we have a recursive reference in an allOf - old_resolver = resolver - resolver = resolved.resolver - add_schema(resolved.contents) - resolver = old_resolver + ref = cast(str, value) + abspath = urijoin(base_uri, ref) + resolved = self._resolver.lookup(abspath) + add_schema(resolved.contents, base_uri=resolved.resolver._base_uri) elif key == Keyword.TYPE: value = cast(Union[str, Sequence[str]], value) @@ -785,11 +759,16 @@ def handle_keyword(key: str, value: JSONValue): elif key == Keyword.ALLOF: value = cast(Sequence[JSONSchema], value) for schema in value: - add_schema(schema) + add_schema(schema, base_uri) elif key == ObjectKeywords.PROPERTIES: value = cast(Mapping[str, JSONSchema], value) for name, schema in value.items(): + this_base_uri = schema.get(Keyword.ID, base_uri) + if Keyword.REF in schema: + # Make the ref absolute so that it can be resolved in the right scope later + schema = schema.copy() + schema[Keyword.REF] = urijoin(this_base_uri, schema[Keyword.REF]) properties[name].append(schema) elif key == ObjectKeywords.REQUIRED: @@ -816,20 +795,28 @@ def handle_keyword(key: str, value: JSONValue): else: other_data[key] = value - def add_schema(schema: JSONSchema): + def add_schema(schema: JSONSchema, base_uri: str): if schema is True: return if schema is False: raise ValueError("allOf contains a False schema") + # Validate the schema's keys (we have only validated the parent schema's keys so far) # TODO: This will make us validate the parent twice... should probably be refactored validate_json_node_keys(schema) + + # Set the base_uri for this schema + if Keyword.ID in schema: + # TODO: avoid copies if possible..? + schema = schema.copy() + base_uri = urijoin(base_uri, schema.pop(Keyword.ID)) + for key, value in schema.items(): if key in IGNORED_KEYS: continue - handle_keyword(key, value) + handle_keyword(key, value, base_uri) - add_schema(parent_schema) + add_schema(parent_schema, base_uri) combined_schema = { Keyword.TYPE: type, @@ -857,7 +844,7 @@ def add_schema(schema: JSONSchema): assert not set(combined_schema) & set(other_data) combined_schema.update(other_data) - return lm + self.json(json_schema=combined_schema) + return lm + self.json(json_schema=combined_schema, base_uri=base_uri) @guidance(stateless=True) @@ -893,7 +880,8 @@ def const( "properties": {k: {"const": v} for k, v in dict(value).items()}, "required": list(value.keys()), "additionalProperties": False, - } + }, + base_uri="", # dummy value -- we don't need to resolve anything ) if isinstance(value, Sequence): return lm + self.json( @@ -903,7 +891,8 @@ def const( "minItems": len(value), "maxItems": len(value), "items": False, - } + }, + base_uri="", # dummy value -- we don't need to resolve anything ) raise TypeError(f"Unsupported value type: {type(value)} for value: {value!r}") @@ -931,23 +920,26 @@ def enum( def any(self, lm): return lm + select( [ - self.json(json_schema={"type": "null"}), - self.json(json_schema={"type": "boolean"}), - self.json(json_schema={"type": "integer"}), - self.json(json_schema={"type": "number"}), - self.json(json_schema={"type": "string"}), + # Dummy base uris ok since we're not resolving anything + self.json(json_schema={"type": "null"}, base_uri=""), + self.json(json_schema={"type": "boolean"}, base_uri=""), + self.json(json_schema={"type": "integer"}, base_uri=""), + self.json(json_schema={"type": "number"}, base_uri=""), + self.json(json_schema={"type": "string"}, base_uri=""), # Recursive cases self.json( json_schema={ "type": "array", "items": True, }, + base_uri="", ), self.json( json_schema={ "type": "object", "additionalProperties": True, }, + base_uri="", ), ] ) @@ -959,6 +951,7 @@ def json( lm, *, json_schema: JSONSchema, + base_uri: str, ): if json_schema is True: json_schema = {} @@ -970,6 +963,10 @@ def json( validate_json_node_keys(json_schema) + if Keyword.ID in json_schema: + # "cd" into the new base_uri + base_uri = urijoin(base_uri, json_schema[Keyword.ID]) + if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema: parent_schema = json_schema.copy() anyof_list = parent_schema.pop(Keyword.ANYOF) @@ -983,7 +980,8 @@ def json( for any_item in anyof_list ]} for one_item in oneof_list - ] + ], + base_uri=base_uri, ) if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema: @@ -995,7 +993,8 @@ def json( anyof_list=[ {"allOf": [any_item, *allof_list], **parent_schema} for any_item in anyof_list - ] + ], + base_uri=base_uri, ) if Keyword.ALLOF in json_schema and Keyword.ONEOF in json_schema: @@ -1007,7 +1006,8 @@ def json( oneof_list=[ {"allOf": [one_item, *allof_list], **parent_schema} for one_item in oneof_list - ] + ], + base_uri=base_uri, ) if Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema: @@ -1021,16 +1021,17 @@ def json( {"allOf": [one_item, any_item], **parent_schema} for any_item in anyof_list for one_item in oneof_list - ] + ], + base_uri=base_uri, ) if Keyword.ALLOF in json_schema: - return lm + self.allOf(parent_schema=json_schema) + return lm + self.allOf(parent_schema=json_schema, base_uri=base_uri) if Keyword.ANYOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF) if not sibling_keys: - return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF]) + return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF], base_uri=base_uri) # Let the allOf function handle anyOfs with sibling keys parent_schema = json_schema.copy() anyof_list = parent_schema.pop(Keyword.ANYOF) @@ -1038,13 +1039,14 @@ def json( anyof_list=[ {"allOf": [any_item], **parent_schema} for any_item in anyof_list - ] + ], + base_uri=base_uri, ) if Keyword.ONEOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF) if not sibling_keys: - return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF]) + return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF], base_uri=base_uri) # Let the allOf function handle oneOfs with sibling keys parent_schema = json_schema.copy() oneof_list = parent_schema.pop(Keyword.ONEOF) @@ -1053,18 +1055,19 @@ def json( oneof_list=[ {"allOf": [one_item], **parent_schema} for one_item in oneof_list - ] + ], + base_uri=base_uri, ) if Keyword.REF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.REF) if not sibling_keys: - return lm + self.ref(reference=json_schema[Keyword.REF]) + return lm + self.ref(reference=json_schema[Keyword.REF], base_uri=base_uri) # Let the allOf function handle refs with sibling keys parent_schema = json_schema.copy() ref = parent_schema.pop(Keyword.REF) assert Keyword.ALLOF not in parent_schema - return lm + self.allOf(parent_schema={"allOf": [{Keyword.REF: ref}], **parent_schema}) + return lm + self.allOf(parent_schema={"allOf": [{Keyword.REF: ref}], **parent_schema}, base_uri=base_uri) if Keyword.CONST in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM} @@ -1139,12 +1142,14 @@ def json( item_schema=json_schema.get(ArrayKeywords.ITEMS, True), min_items=json_schema.get(ArrayKeywords.MIN_ITEMS, 0), max_items=json_schema.get(ArrayKeywords.MAX_ITEMS, None), + base_uri=base_uri, ) elif target_type == JSONType.OBJECT: option = self.object( properties=json_schema.get(ObjectKeywords.PROPERTIES, {}), additional_properties=json_schema.get(ObjectKeywords.ADDITIONAL_PROPERTIES, True), required=json_schema.get(ObjectKeywords.REQUIRED, set()), + base_uri=base_uri, ) else: raise ValueError(f"Unsupported type in schema: {target_type}") From 2732bd7c79e6bab9cf1e654b11ecf721de8f4007 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 16:33:12 -0700 Subject: [PATCH 21/70] reorder properties in test cases to be consistent with the order we validate (arbitrary...) --- tests/unit/library/test_json.py | 12 ++++++------ tests/unit/library/test_json_allOf.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py index 52672730c..65a915626 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/test_json.py @@ -1555,11 +1555,11 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct( ["test_object", "valid"], [ # invalid on inner field - ({"foo": {"bar": 1}, "bar": "a"}, False), + ({"bar": "a", "foo": {"bar": 1}}, False), # invalid on outer field - ({"foo": {"bar": "a"}, "bar": 1}, False), + ({ "bar": 1, "foo": {"bar": "a"}}, False), # valid on both fields - ({"foo": {"bar": "a"}, "bar": "a"}, True), + ({"bar": "a", "foo": {"bar": "a"}, }, True), ], ) def test_refs_with_relative_uris_and_defs(self, test_object, valid): @@ -1587,11 +1587,11 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid): ["test_object", "valid"], [ # invalid on inner field - ({"foo": {"bar": 1}, "bar": "a"}, False), + ({"bar": "a", "foo": {"bar": 1}}, False), # invalid on outer field - ({"foo": {"bar": "a"}, "bar": 1}, False), + ({"bar": 1, "foo": {"bar": "a"}}, False), # valid on both fields - ({"foo": {"bar": "a"}, "bar": "a"}, True), + ({"bar": "a", "foo": {"bar": "a"}}, True), ], ) def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid): diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py index 6ef14aee3..3544bd643 100644 --- a/tests/unit/library/test_json_allOf.py +++ b/tests/unit/library/test_json_allOf.py @@ -6,18 +6,18 @@ from .test_json import check_match_failure, generate_and_check -class TestDynamicRefs: +class TestAllOf: @pytest.mark.parametrize( ["test_object", "valid"], [ # allOf - ({"foo": "baz", "bar": 2}, True), + ({"bar": 2, "foo": "baz"}, True), # mismatch second ({"foo": "baz"}, False), # mismatch first ({"bar": 2}, False), # wrong type - ({"foo": "baz", "bar": "quux"}, False), + ({"bar": "quux", "foo": "baz"}, False), ], ) def test_allOf(self, test_object, valid): @@ -40,13 +40,13 @@ def test_allOf(self, test_object, valid): ["test_object", "valid"], [ # valid - ({"foo": "quux", "bar": 2, "baz": None}, True), + ({"bar": 2, "foo": "quux", "baz": None}, True), # mismatch base schema ({"foo": "quux", "baz": None}, False), # mismatch first allOf ({"bar": 2, "baz": None}, False), # mismatch second allOf - ({"foo": "quux", "bar": 2}, False), + ({"bar": 2, "foo": "quux"}, False), # mismatch both ({"bar": 2}, False), ], From 6bc08aed454331d615c12d51c986a86421cc95f6 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 16:36:06 -0700 Subject: [PATCH 22/70] false schemas --- tests/unit/library/test_json_allOf.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py index 3544bd643..64ec88d90 100644 --- a/tests/unit/library/test_json_allOf.py +++ b/tests/unit/library/test_json_allOf.py @@ -3,6 +3,7 @@ import pytest from jsonschema import ValidationError, validate +from guidance import json as gen_json from .test_json import check_match_failure, generate_and_check @@ -126,7 +127,9 @@ def test_allOf_with_boolean_schemas_some_false(self, test_object, valid): else: with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == "allOf contains a False schema" @pytest.mark.parametrize( ["test_object", "valid"], @@ -146,7 +149,9 @@ def test_allOf_with_boolean_schemas_all_false(self, test_object, valid): else: with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == "allOf contains a False schema" @pytest.mark.parametrize( ["test_object", "valid"], From a0b1c12839860cc839591f1f9fca88189295e80d Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 17:04:13 -0700 Subject: [PATCH 23/70] enum and const --- guidance/library/_json.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index c70a8ac8b..cc76ecc10 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -42,6 +42,11 @@ def urijoin(base: str, uri: str) -> str: JSONValue = Union[None, bool, int, float, str, Mapping[str, "JSONValue"], Sequence["JSONValue"]] JSONSchema = Union[bool, Mapping[str, JSONValue]] +class Unset(Enum): + # https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions + token = 0 +_unset = Unset.token + DRAFT202012_RESERVED_KEYWORDS = { # Anchors and References '$anchor', @@ -731,10 +736,14 @@ def allOf( additional_properties_list: list[JSONSchema] = [] items_list: list[JSONSchema] = [] other_data: dict[str, JSONValue] = {} + enum: Optional[list[JSONValue]] = None + const: Union[Unset, JSONValue] = _unset def handle_keyword(key: str, value: JSONValue, base_uri: str): nonlocal type nonlocal required + nonlocal const + nonlocal enum if key == Keyword.REF: ref = cast(str, value) @@ -742,6 +751,26 @@ def handle_keyword(key: str, value: JSONValue, base_uri: str): resolved = self._resolver.lookup(abspath) add_schema(resolved.contents, base_uri=resolved.resolver._base_uri) + elif key == Keyword.CONST: + value = cast(JSONValue, value) + if const is not _unset and const != value: + raise ValueError(f"allOf with multiple conflicting const values: {const!r} and {value!r}") + const = value + + elif key == Keyword.ENUM: + value = cast(Sequence[JSONValue], value) + if enum is not None: + try: + enum = list(set(enum) & set(value)) + except TypeError: + # Check on equality, not on hash + # Yes, this is O(n^2). + # Hope the items were unique. + # ¯\_(ツ)_/¯ + enum = [a for a in enum if a == b for b in value] + else: + enum = value + elif key == Keyword.TYPE: value = cast(Union[str, Sequence[str]], value) if isinstance(value, str): @@ -819,7 +848,7 @@ def add_schema(schema: JSONSchema, base_uri: str): add_schema(parent_schema, base_uri) combined_schema = { - Keyword.TYPE: type, + Keyword.TYPE: list(type), } if properties: combined_schema[ObjectKeywords.PROPERTIES] = {} @@ -840,6 +869,10 @@ def add_schema(schema: JSONSchema, base_uri: str): combined_schema[ArrayKeywords.ITEMS] = items_list[0] else: combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list} + if enum is not None: + combined_schema[Keyword.ENUM] = enum + if const is not _unset: + combined_schema[Keyword.CONST] = const assert not set(combined_schema) & set(other_data) combined_schema.update(other_data) From 4505171074301b7a486a1a96343f5c915155f7cf Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 17:04:54 -0700 Subject: [PATCH 24/70] modify test to use enum instead of multipleOf (which we don't have an implementation of) --- tests/unit/library/test_json_allOf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py index 64ec88d90..ebf8fcc25 100644 --- a/tests/unit/library/test_json_allOf.py +++ b/tests/unit/library/test_json_allOf.py @@ -277,9 +277,9 @@ def test_nested_allOf_to_check_validation_semantics(self, test_object, valid): def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", - "allOf": [{"multipleOf": 2}], - "anyOf": [{"multipleOf": 3}], - "oneOf": [{"multipleOf": 5}], + "allOf": [{"enum": [2, 6, 10, 30]}], + "anyOf": [{"enum": [3, 6, 15, 30]}], + "oneOf": [{"enum": [5, 10, 15, 30]}], } if valid: validate(instance=test_object, schema=schema) From 7d0b576e8316eb83a20cd4c9c63cce1390bf7939 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 17:07:50 -0700 Subject: [PATCH 25/70] remove the ternary implementation since the union of all the binary ones cover it --- guidance/library/_json.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index cc76ecc10..ecb600580 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -1000,23 +1000,6 @@ def json( # "cd" into the new base_uri base_uri = urijoin(base_uri, json_schema[Keyword.ID]) - if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema: - parent_schema = json_schema.copy() - anyof_list = parent_schema.pop(Keyword.ANYOF) - allof_list = parent_schema.pop(Keyword.ALLOF) - oneof_list = parent_schema.pop(Keyword.ONEOF) - # Reduce the problem to a oneOf of anyOfs of allOfs - return lm + self.oneOf( - oneof_list=[ - {"anyOf": [ - {"allOf": [one_item, any_item, *allof_list], **parent_schema} - for any_item in anyof_list - ]} - for one_item in oneof_list - ], - base_uri=base_uri, - ) - if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema: parent_schema = json_schema.copy() anyof_list = parent_schema.pop(Keyword.ANYOF) From c5ed6b3f185d302415dfb28b57705b6086a6d259 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 30 Oct 2024 17:21:52 -0700 Subject: [PATCH 26/70] make mypy less sad --- guidance/library/_json.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index ecb600580..a270d23dd 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -39,8 +39,7 @@ def urijoin(base: str, uri: str) -> str: from ._pydantic import pydantic_to_json_schema from ._subgrammar import as_regular_grammar, lexeme, subgrammar -JSONValue = Union[None, bool, int, float, str, Mapping[str, "JSONValue"], Sequence["JSONValue"]] -JSONSchema = Union[bool, Mapping[str, JSONValue]] +JSONSchema = Union[bool, dict[str, Any]] class Unset(Enum): # https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions @@ -735,11 +734,11 @@ def allOf( required: set[str] = set() additional_properties_list: list[JSONSchema] = [] items_list: list[JSONSchema] = [] - other_data: dict[str, JSONValue] = {} - enum: Optional[list[JSONValue]] = None - const: Union[Unset, JSONValue] = _unset + other_data: dict[str, Any] = {} + enum: Optional[list[Any]] = None + const: Union[Unset, Any] = _unset - def handle_keyword(key: str, value: JSONValue, base_uri: str): + def handle_keyword(key: str, value: Any, base_uri: str): nonlocal type nonlocal required nonlocal const @@ -752,13 +751,12 @@ def handle_keyword(key: str, value: JSONValue, base_uri: str): add_schema(resolved.contents, base_uri=resolved.resolver._base_uri) elif key == Keyword.CONST: - value = cast(JSONValue, value) if const is not _unset and const != value: raise ValueError(f"allOf with multiple conflicting const values: {const!r} and {value!r}") const = value elif key == Keyword.ENUM: - value = cast(Sequence[JSONValue], value) + value = cast(list[Any], value) if enum is not None: try: enum = list(set(enum) & set(value)) @@ -767,12 +765,12 @@ def handle_keyword(key: str, value: JSONValue, base_uri: str): # Yes, this is O(n^2). # Hope the items were unique. # ¯\_(ツ)_/¯ - enum = [a for a in enum if a == b for b in value] + enum = [a for a in enum for b in value if a == b] else: enum = value elif key == Keyword.TYPE: - value = cast(Union[str, Sequence[str]], value) + value = cast(Union[str, list[str]], value) if isinstance(value, str): value_set = {value} else: @@ -791,13 +789,14 @@ def handle_keyword(key: str, value: JSONValue, base_uri: str): add_schema(schema, base_uri) elif key == ObjectKeywords.PROPERTIES: - value = cast(Mapping[str, JSONSchema], value) + value = cast(dict[str, JSONSchema], value) for name, schema in value.items(): - this_base_uri = schema.get(Keyword.ID, base_uri) - if Keyword.REF in schema: - # Make the ref absolute so that it can be resolved in the right scope later - schema = schema.copy() - schema[Keyword.REF] = urijoin(this_base_uri, schema[Keyword.REF]) + if isinstance(schema, dict): + this_base_uri = schema.get(Keyword.ID, base_uri) + if Keyword.REF in schema: + # Make the ref absolute so that it can be resolved in the right scope later + schema = schema.copy() + schema[Keyword.REF] = urijoin(this_base_uri, schema[Keyword.REF]) properties[name].append(schema) elif key == ObjectKeywords.REQUIRED: @@ -847,7 +846,7 @@ def add_schema(schema: JSONSchema, base_uri: str): add_schema(parent_schema, base_uri) - combined_schema = { + combined_schema: dict[str, Any] = { Keyword.TYPE: list(type), } if properties: From a8cdd2a26607c9a9f40fd44167dad9d826a7c1cf Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 12:47:34 -0700 Subject: [PATCH 27/70] apply additionalProperties correctly in allOf --- guidance/library/_json.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index a270d23dd..26f916109 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -732,13 +732,13 @@ def allOf( type = set(JSONType) properties: defaultdict[str, list[JSONSchema]] = defaultdict(list) required: set[str] = set() - additional_properties_list: list[JSONSchema] = [] + additional_properties_list: list[tuple[JSONSchema, set[str]]] = [] items_list: list[JSONSchema] = [] other_data: dict[str, Any] = {} enum: Optional[list[Any]] = None const: Union[Unset, Any] = _unset - def handle_keyword(key: str, value: Any, base_uri: str): + def handle_keyword(key: str, value: Any, parent_schema: JSONSchema, base_uri: str): nonlocal type nonlocal required nonlocal const @@ -804,10 +804,16 @@ def handle_keyword(key: str, value: Any, base_uri: str): required |= set(value) elif key == ObjectKeywords.ADDITIONAL_PROPERTIES: - # TODO: do the additionalProperties of one schema need to evaluate against the properties of another? # TODO: unevaluatedProperties? value = cast(JSONSchema, value) - additional_properties_list.append(value) + # We need to keep track of which properties are exempt from this additionalProperties schema, + # i.e. the ones defined in the parent schema + exempt_properties: set[str] = set() + if ObjectKeywords.PROPERTIES in parent_schema: + exempt_properties = set(parent_schema[ObjectKeywords.PROPERTIES]) + additional_properties_list.append( + (value, exempt_properties) + ) elif key == ArrayKeywords.ITEMS: value = cast(JSONSchema, value) @@ -842,13 +848,20 @@ def add_schema(schema: JSONSchema, base_uri: str): for key, value in schema.items(): if key in IGNORED_KEYS: continue - handle_keyword(key, value, base_uri) + handle_keyword(key, value, schema, base_uri) add_schema(parent_schema, base_uri) combined_schema: dict[str, Any] = { Keyword.TYPE: list(type), } + + # Post-process additional_properties to make sure we apply the additional properties of one + # schema to the properties of another schema + for additional_schema, exempt_properties in additional_properties_list: + for name in set(properties) - exempt_properties: + properties[name].append(additional_schema) + if properties: combined_schema[ObjectKeywords.PROPERTIES] = {} for name, schemas in properties.items(): @@ -860,9 +873,9 @@ def add_schema(schema: JSONSchema, base_uri: str): combined_schema[ObjectKeywords.REQUIRED] = required if additional_properties_list: if len(additional_properties_list) == 1: - combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = additional_properties_list[0] + combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES], _ = additional_properties_list[0] else: - combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list} + combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": [schema for schema, _ in additional_properties_list]} if items_list: if len(items_list) == 1: combined_schema[ArrayKeywords.ITEMS] = items_list[0] From 985d38e1a0dac4234b9a5424e16071884f2c093f Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 13:29:03 -0700 Subject: [PATCH 28/70] tests for additionalProperties in allOf --- tests/unit/library/test_json_allOf.py | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py index ebf8fcc25..23b89e2ec 100644 --- a/tests/unit/library/test_json_allOf.py +++ b/tests/unit/library/test_json_allOf.py @@ -288,3 +288,60 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + "test_object, valid", + [ + # valid: foo is integer and less than 4, bar is equal to 5, baz is integer greater than 5 + ({"foo": 0, "bar": 5, "baz": 10}, True), + # valid: foo is null, bar is equal to 5, baz is null + ({"foo": None, "bar": 5, "baz": None}, True), + # valid: foo is integer and less than 4, bar is non-number, baz is integer greater than 5 + ({"foo": 0, "bar": "quxx", "baz": 10}, True), + # invalid: foo is integer and greater than 4 + ({"foo": 5, "bar": 5, "baz": 10}, False), + # invalid: foo is not an integer or None + ({"foo": "quxx", "bar": 5, "baz": 10}, False), + # invalid: bar is greater than 5 + ({"foo": 0, "bar": 6, "baz": 10}, False), + # invalid: bar is less than 5 + ({"foo": 0, "bar": 4, "baz": 10}, False), + # invalid: baz is less than 5 + ({"foo": 0, "bar": 5, "baz": 4}, False), + # invalid: baz is not an integer or null + ({"foo": 0, "bar": 5, "baz": "quxx"}, False), + ] + ) + @pytest.mark.parametrize( + "schema", + [ + # The following are equivalent to this: + { + "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}}, + "additionalProperties": {"type": ["integer", "null"], "minimum": 5} + }, + # additionalProperties in parent schema + { + "allOf": [ + {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}} + ], + "properties": {"bar": {"maximum": 5}}, + "additionalProperties": {"type": ["integer", "null"]} + }, + # additionalProperties in allOf + { + "allOf": [ + {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}, + {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}} + ] + }, + ] + ) + def test_additionalProperties_in_allOf(self, schema, test_object, valid): + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) From 60bf51b1ffd996858d4f9cf5b308e23289b339bc Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 13:41:09 -0700 Subject: [PATCH 29/70] add (xfailed) test for inconsistent additionalProperties values in allOf --- tests/unit/library/test_json_allOf.py | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py index 23b89e2ec..4fd860a72 100644 --- a/tests/unit/library/test_json_allOf.py +++ b/tests/unit/library/test_json_allOf.py @@ -345,3 +345,36 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid): with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + "test_object, valid", + [ + ({}, True), # empty object is valid + ({"foo": 1}, False), # foo is not a string + ({"foo": "x"}, False), # foo is not an integer + ({"foo": True}, False), # foo is not a string or an integer + ] + ) + def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): + schema = { + "type": "object", + "allOf": [ + {"additionalProperties": {"type": "integer"}}, + {"additionalProperties": {"type": "string"}} + ] + } + try: + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + except ValueError as ve: + if ve.args[0] == "allOf with conflicting types": + pytest.xfail( + reason="We should be returning a False schema from allOf if there is a conflict, but we currently raise an error" + ) + else: + raise From dbec4593b84674b2d431a19efc2a4d0716148735 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 14:14:48 -0700 Subject: [PATCH 30/70] mypy --- guidance/library/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 26f916109..747773445 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -738,7 +738,7 @@ def allOf( enum: Optional[list[Any]] = None const: Union[Unset, Any] = _unset - def handle_keyword(key: str, value: Any, parent_schema: JSONSchema, base_uri: str): + def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri: str): nonlocal type nonlocal required nonlocal const From 4e688bd3e2f2b0fe1a4e78831d7faf1a39a5c9b4 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 16:49:50 -0700 Subject: [PATCH 31/70] prefixItems and items --- guidance/library/_json.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 747773445..52b2a9fe3 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -733,7 +733,8 @@ def allOf( properties: defaultdict[str, list[JSONSchema]] = defaultdict(list) required: set[str] = set() additional_properties_list: list[tuple[JSONSchema, set[str]]] = [] - items_list: list[JSONSchema] = [] + prefix_items: defaultdict[int, list[JSONSchema]] = defaultdict(list) + items_list: list[tuple[JSONSchema, set[int]]] = [] other_data: dict[str, Any] = {} enum: Optional[list[Any]] = None const: Union[Unset, Any] = _unset @@ -815,9 +816,22 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri (value, exempt_properties) ) + elif key == ArrayKeywords.PREFIX_ITEMS: + value = cast(Sequence[JSONSchema], value) + for i, schema in enumerate(value): + prefix_items[i].append(schema) + elif key == ArrayKeywords.ITEMS: + # TODO: unevaluatedItems? value = cast(JSONSchema, value) - items_list.append(value) + # We need to keep track of which prefixItems are exempt from this additionalItems schema, + # i.e. the ones defined in the parent schema + exempt_prefix_items: set[int] = set() + if ArrayKeywords.PREFIX_ITEMS in parent_schema: + exempt_prefix_items = set(range(len(parent_schema[ArrayKeywords.PREFIX_ITEMS]))) + items_list.append( + (value, exempt_prefix_items) + ) elif key in set(Keyword): # If we've done our job right, we should never hit this case... @@ -862,6 +876,11 @@ def add_schema(schema: JSONSchema, base_uri: str): for name in set(properties) - exempt_properties: properties[name].append(additional_schema) + # Post-process items to make sure we apply the additional items of one schema to the prefix items of another schema + for additional_schema, exempt_prefix_items in items_list: + for i in set(prefix_items) - exempt_prefix_items: + prefix_items[i].append(additional_schema) + if properties: combined_schema[ObjectKeywords.PROPERTIES] = {} for name, schemas in properties.items(): @@ -876,11 +895,19 @@ def add_schema(schema: JSONSchema, base_uri: str): combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES], _ = additional_properties_list[0] else: combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": [schema for schema, _ in additional_properties_list]} + if prefix_items: + combined_schema[ArrayKeywords.PREFIX_ITEMS] = [] + for i in range(len(prefix_items)): + schemas = prefix_items[i] + if len(schemas) == 1: + combined_schema[ArrayKeywords.PREFIX_ITEMS].append(schemas[0]) + else: + combined_schema[ArrayKeywords.PREFIX_ITEMS].append({"allOf": schemas}) if items_list: if len(items_list) == 1: - combined_schema[ArrayKeywords.ITEMS] = items_list[0] + combined_schema[ArrayKeywords.ITEMS], _ = items_list[0] else: - combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list} + combined_schema[ArrayKeywords.ITEMS] = {"allOf": [schema for schema, _ in items_list]} if enum is not None: combined_schema[Keyword.ENUM] = enum if const is not _unset: From f71cf9197e16e54f75a9c8bdea491f92d8a57749 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 16:50:07 -0700 Subject: [PATCH 32/70] some simple reduction ops --- guidance/library/_json.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 52b2a9fe3..9846f2f06 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -838,7 +838,18 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri raise NotImplementedError(f"Don't yet know how to handle {key} in allOf") elif key in other_data: - raise NotImplementedError(f"Don't yet know how to reduce multiple values of {key!r} in allOf") + if key in { + NumberKeywords.MINIMUM, NumberKeywords.EXCLUSIVE_MINIMUM, + StringKeywords.MIN_LENGTH, ArrayKeywords.MIN_ITEMS + }: + other_data[key] = max(other_data[key], value) + elif key in { + NumberKeywords.MAXIMUM, NumberKeywords.EXCLUSIVE_MAXIMUM, + StringKeywords.MAX_LENGTH, ArrayKeywords.MAX_ITEMS + }: + other_data[key] = min(other_data[key], value) + else: + raise NotImplementedError(f"Don't yet know how to reduce multiple values of {key!r} in allOf") else: other_data[key] = value From cf765fa21d192b6d76a733aa9448e2d49b4e8a46 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 16:53:20 -0700 Subject: [PATCH 33/70] deterministic order of required properties --- guidance/library/_json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 9846f2f06..cb6ea3aff 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -731,7 +731,7 @@ def allOf( ): type = set(JSONType) properties: defaultdict[str, list[JSONSchema]] = defaultdict(list) - required: set[str] = set() + required: dict[str, None] = dict() # use a dict for ordered-set behavior additional_properties_list: list[tuple[JSONSchema, set[str]]] = [] prefix_items: defaultdict[int, list[JSONSchema]] = defaultdict(list) items_list: list[tuple[JSONSchema, set[int]]] = [] @@ -802,7 +802,7 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri elif key == ObjectKeywords.REQUIRED: value = cast(Sequence[str], value) - required |= set(value) + required.update({name: None for name in value}) elif key == ObjectKeywords.ADDITIONAL_PROPERTIES: # TODO: unevaluatedProperties? @@ -900,7 +900,7 @@ def add_schema(schema: JSONSchema, base_uri: str): else: combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas} if required: - combined_schema[ObjectKeywords.REQUIRED] = required + combined_schema[ObjectKeywords.REQUIRED] = list(required.keys()) if additional_properties_list: if len(additional_properties_list) == 1: combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES], _ = additional_properties_list[0] From a656c4534cd4b6087e4ba9dda192fe7b4b229bcc Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 17:23:19 -0700 Subject: [PATCH 34/70] add tests for prefixItems and items --- tests/unit/library/test_json_allOf.py | 58 +++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py index 4fd860a72..318fe7cb7 100644 --- a/tests/unit/library/test_json_allOf.py +++ b/tests/unit/library/test_json_allOf.py @@ -378,3 +378,61 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): ) else: raise + + @pytest.mark.parametrize( + "test_object, valid", + [ + # valid: foo is integer and less than 4, bar is equal to 5, baz is integer greater than 5 + ([0, 5, 10], True), + # valid: foo is null, bar is equal to 5, baz is null + ([None, 5, None], True), + # valid: foo is integer and less than 4, bar is non-number, baz is integer greater than 5 + ([0, "quxx", 10], True), + # invalid: foo is integer and greater than 4 + ([5, 5, 10], False), + # invalid: foo is not an integer or None + (["quxx", 5, 10], False), + # invalid: bar is greater than 5 + ([0, 6, 10], False), + # invalid: bar is less than 5 + ([0, 4, 10], False), + # invalid: baz is less than 5 + ([0, 5, 4], False), + # invalid: baz is not an integer or null + ([0, 5, "quxx"], False), + ] + ) + @pytest.mark.parametrize( + "schema", + [ + # The following are equivalent to this: + { + "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}], + "items": {"type": ["integer", "null"], "minimum": 5} + }, + # items in parent schema + { + "allOf": [ + {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, + ], + "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], + "items": {"type": ["integer", "null"]} + + }, + # items in allOf + { + "allOf": [ + {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, + {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}} + ] + }, + ] + ) + def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid): + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) From 1cf39103c5fd8054d0fae5932ae41f3144e586e7 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 31 Oct 2024 17:27:15 -0700 Subject: [PATCH 35/70] test for two minimums or maximums --- tests/unit/library/test_json_allOf.py | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py index 318fe7cb7..d6152a664 100644 --- a/tests/unit/library/test_json_allOf.py +++ b/tests/unit/library/test_json_allOf.py @@ -92,6 +92,51 @@ def test_allOf_simple_types(self, test_object, valid): validate(instance=test_object, schema=schema) check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # mismatch one + (25, False), + # valid + (35, True), + ], + ) + def test_allOf_simple_minimum(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{"minimum": 30}, {"minimum": 20}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # mismatch one + (25, False), + # valid + (15, True), + ], + ) + def test_allOf_simple_maximum(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{"maximum": 30}, {"maximum": 20}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( ["test_object", "valid"], [ From dd0f2a459e24d413bc76333df62e490e785a7236 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 09:45:23 -0700 Subject: [PATCH 36/70] refactor json tests into multiple files --- tests/unit/library/json/__init__.py | 0 .../test_allOf.py} | 5 +- tests/unit/library/{ => json}/test_json.py | 1029 +---------------- tests/unit/library/json/test_refs.py | 978 ++++++++++++++++ .../test_string_format.py} | 4 +- tests/unit/library/json/utils.py | 58 + 6 files changed, 1046 insertions(+), 1028 deletions(-) create mode 100644 tests/unit/library/json/__init__.py rename tests/unit/library/{test_json_allOf.py => json/test_allOf.py} (98%) rename tests/unit/library/{ => json}/test_json.py (65%) create mode 100644 tests/unit/library/json/test_refs.py rename tests/unit/library/{test_json_stringformat.py => json/test_string_format.py} (99%) create mode 100644 tests/unit/library/json/utils.py diff --git a/tests/unit/library/json/__init__.py b/tests/unit/library/json/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/json/test_allOf.py similarity index 98% rename from tests/unit/library/test_json_allOf.py rename to tests/unit/library/json/test_allOf.py index d6152a664..74aa539b6 100644 --- a/tests/unit/library/test_json_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -1,10 +1,13 @@ +"""Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/allOf.json""" + from json import dumps as json_dumps import pytest from jsonschema import ValidationError, validate from guidance import json as gen_json -from .test_json import check_match_failure, generate_and_check +from .utils import generate_and_check +from .utils import check_match_failure class TestAllOf: diff --git a/tests/unit/library/test_json.py b/tests/unit/library/json/test_json.py similarity index 65% rename from tests/unit/library/test_json.py rename to tests/unit/library/json/test_json.py index 65a915626..d94a98b69 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -1,65 +1,14 @@ import json -from functools import partial -from typing import Any, Set, Union, Optional import pytest from jsonschema import validate, ValidationError -from json import dumps as json_dumps, loads as json_loads +from json import dumps as json_dumps from guidance import json as gen_json from guidance import models -from guidance.library._json import IGNORED_KEYS, JSONSchema - -from ...utils import check_match_failure as _check_match_failure -from ...utils import check_run_with_temperature -from ...utils import generate_and_check as _generate_and_check - - -def generate_and_check( - target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None -): - if isinstance(schema_obj, str): - schema_obj = json_loads(schema_obj) - - # Sanity check what we're being asked - validate(instance=target_obj, schema=schema_obj) - prepared_json = json_dumps(target_obj) - assert json.loads(prepared_json) == target_obj - - # Now test that the grammar can recognize and generate prepared_json - # We partial in the grammar_callable - if desired_temperature is not None: - grammar_callable = partial( - gen_json, schema=schema_obj, temperature=desired_temperature - ) - else: - grammar_callable = partial(gen_json, schema=schema_obj) - - lm = _generate_and_check( - grammar_callable, - test_string=prepared_json, - ) - check_run_with_temperature(lm, desired_temperature) - - -def check_match_failure( - *, - bad_string: str, - good_bytes: Optional[bytes] = None, - failure_byte: Optional[bytes] = None, - allowed_bytes: Optional[Set[bytes]] = None, - schema_obj: Union[str, JSONSchema], -): - grammar = gen_json(schema=schema_obj) - - _check_match_failure( - bad_string=bad_string, - good_bytes=good_bytes, - failure_byte=failure_byte, - allowed_bytes=allowed_bytes, - grammar=grammar, - ) +from guidance.library._json import IGNORED_KEYS +from .utils import check_match_failure, generate_and_check # Common sets of allowed_bytes @@ -1124,978 +1073,6 @@ def test_bad_with_items( ) -class TestRefs: - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # match - ({"foo": False}, True), - # recursive match - ({"foo": {"foo": False}}, True), - # mismatch - ({"bar": False}, False), - # recursive mismatch - ({"foo": {"bar": False}}, False), - ], - ) - def test_root_pointer_ref(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "properties": {"foo": {"$ref": "#"}}, - "additionalProperties": False, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # match - ({"bar": 3}, True), - # mismatch - ({"bar": True}, False), - ], - ) - def test_relative_pointer_ref_to_object(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "properties": {"foo": {"type": "integer"}, "bar": {"$ref": "#/properties/foo"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # match array - ([1, 2], True), - # mismatch array - ([1, "foo"], False), - ], - ) - def test_relative_pointer_ref_to_array(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "prefixItems": [{"type": "integer"}, {"$ref": "#/prefixItems/0"}], - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # slash invalid - ({"slash": "aoeu"}, False), - # tilde invalid - ({"tilde": "aoeu"}, False), - # percent invalid - ({"percent": "aoeu"}, False), - # slash valid - ({"slash": 123}, True), - # tilde valid - ({"tilde": 123}, True), - # percent valid - ({"percent": 123}, True), - ], - ) - def test_escaped_pointer_ref(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$defs": { - "tilde~field": {"type": "integer"}, - "slash/field": {"type": "integer"}, - "percent%field": {"type": "integer"}, - }, - "properties": { - "tilde": {"$ref": "#/$defs/tilde~0field"}, - "slash": {"$ref": "#/$defs/slash~1field"}, - "percent": {"$ref": "#/$defs/percent%25field"}, - }, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # nested ref valid - (5, True), - # nested ref invalid - ("a", False), - ], - ) - def test_nested_refs(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$defs": { - "a": {"type": "integer"}, - "b": {"$ref": "#/$defs/a"}, - "c": {"$ref": "#/$defs/b"}, - }, - "$ref": "#/$defs/c", - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # ref valid, maxItems valid - ({"foo": []}, True), - # ref valid, maxItems invalid - ({"foo": [1, 2, 3]}, False), - # ref invalid - ({"foo": "string"}, False), - ], - ) - def test_ref_applies_alongside_sibling_keywords(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$defs": {"reffed": {"type": "array"}}, - "properties": {"foo": {"$ref": "#/$defs/reffed", "maxItems": 2}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # remote ref valid - ({"minLength": 1}, True), - # remote ref invalid - ({"minLength": -1}, False), - ], - ) - @pytest.mark.xfail(reason="Remote refs are not supported") - def test_remote_ref_containing_refs_itself(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "https://json-schema.org/draft/2020-12/schema", - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # property named $ref valid - ({"$ref": "a"}, True), - # property named $ref invalid - ({"$ref": 2}, False), - ], - ) - def test_property_named_ref_that_is_not_a_reference(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "properties": {"$ref": {"type": "string"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # property named $ref valid - ({"$ref": "a"}, True), - # property named $ref invalid - ({"$ref": 2}, False), - ], - ) - def test_property_named_ref_containing_an_actual_ref(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "properties": {"$ref": {"$ref": "#/$defs/is-string"}}, - "$defs": {"is-string": {"type": "string"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # any value is valid - ("foo", True) - ], - ) - def test_ref_to_boolean_schema_true(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "#/$defs/bool", - "$defs": {"bool": True}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # any value is invalid - ("foo", False) - ], - ) - @pytest.mark.xfail(reason="false schema is not implemented") - def test_ref_to_boolean_schema_false(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "#/$defs/bool", - "$defs": {"bool": False}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # valid tree - ( - { - "meta": "root", - "nodes": [ - { - "value": 1, - "subtree": { - "meta": "child", - "nodes": [{"value": 1.1}, {"value": 1.2}], - }, - }, - { - "value": 2, - "subtree": { - "meta": "child", - "nodes": [{"value": 2.1}, {"value": 2.2}], - }, - }, - ], - }, - True, - ), - # invalid tree - ( - { - "meta": "root", - "nodes": [ - { - "value": 1, - "subtree": { - "meta": "child", - "nodes": [{"value": "string is invalid"}, {"value": 1.2}], - }, - }, - { - "value": 2, - "subtree": { - "meta": "child", - "nodes": [{"value": 2.1}, {"value": 2.2}], - }, - }, - ], - }, - False, - ), - ], - ) - def test_Recursive_references_between_schemas(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "http://localhost:1234/draft2020-12/tree", - "description": "tree of nodes", - "type": "object", - "properties": { - "meta": {"type": "string"}, - "nodes": {"type": "array", "items": {"$ref": "node"}}, - }, - "required": ["meta", "nodes"], - "$defs": { - "node": { - "$id": "http://localhost:1234/draft2020-12/node", - "description": "node", - "type": "object", - "properties": {"value": {"type": "number"}, "subtree": {"$ref": "tree"}}, - "required": ["value"], - } - }, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # object with numbers is valid - ({'foo"bar': 1}, True), - # object with strings is invalid - ({'foo"bar': "1"}, False), - ], - ) - def test_refs_with_quote(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "properties": {'foo"bar': {"$ref": "#/$defs/foo%22bar"}}, - "$defs": {'foo"bar': {"type": "number"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # referenced subschema doesn't see annotations from properties - ({"prop1": "match"}, False) - ], - ) - @pytest.mark.xfail(reason="unevaluatedProperties is not implemented") - def test_ref_creates_new_scope_when_adjacent_to_keywords(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$defs": {"A": {"unevaluatedProperties": False}}, - "properties": {"prop1": {"type": "string"}}, - "$ref": "#/$defs/A", - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # do not evaluate the $ref inside the enum, matching any string - ("this is a string", False), - # do not evaluate the $ref inside the enum, definition exact match - ({"type": "string"}, False), - # match the enum exactly - ({"$ref": "#/$defs/a_string"}, True), - ], - ) - def test_naive_replacement_of_ref_with_its_destination_is_not_correct( - self, test_object, valid - ): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$defs": {"a_string": {"type": "string"}}, - "enum": [{"$ref": "#/$defs/a_string"}], - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # invalid on inner field - ({"bar": "a", "foo": {"bar": 1}}, False), - # invalid on outer field - ({ "bar": 1, "foo": {"bar": "a"}}, False), - # valid on both fields - ({"bar": "a", "foo": {"bar": "a"}, }, True), - ], - ) - def test_refs_with_relative_uris_and_defs(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "http://example.com/schema-relative-uri-defs1.json", - "properties": { - "foo": { - "$id": "schema-relative-uri-defs2.json", - "$defs": {"inner": {"properties": {"bar": {"type": "string"}}}}, - "$ref": "#/$defs/inner", - } - }, - "$ref": "schema-relative-uri-defs2.json", - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # invalid on inner field - ({"bar": "a", "foo": {"bar": 1}}, False), - # invalid on outer field - ({"bar": 1, "foo": {"bar": "a"}}, False), - # valid on both fields - ({"bar": "a", "foo": {"bar": "a"}}, True), - ], - ) - def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "http://example.com/schema-refs-absolute-uris-defs1.json", - "properties": { - "foo": { - "$id": "http://example.com/schema-refs-absolute-uris-defs2.json", - "$defs": {"inner": {"properties": {"bar": {"type": "string"}}}}, - "$ref": "#/$defs/inner", - } - }, - "$ref": "schema-refs-absolute-uris-defs2.json", - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # number is valid - (1, True), - # non-number is invalid - ("a", False), - ], - ) - def test_id_must_be_resolved_against_nearest_parent_not_just_immediate_parent( - self, test_object, valid - ): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "http://example.com/a.json", - "$defs": { - "x": { - "$id": "http://example.com/b/c.json", - "not": {"$defs": {"y": {"$id": "d.json", "type": "number"}}}, - } - }, - "allOf": [{"$ref": "http://example.com/b/d.json"}], - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # data is valid against first definition - (5, True), - # data is invalid against first definition - (50, False), - ], - ) - def test_order_of_evaluation_id_and_ref(self, test_object, valid): - schema = { - "$comment": "$id must be evaluated before $ref to get the proper $ref destination", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://example.com/draft2020-12/ref-and-id1/base.json", - "$ref": "int.json", - "$defs": { - "bigint": { - "$comment": "canonical uri: https://example.com/ref-and-id1/int.json", - "$id": "int.json", - "maximum": 10, - }, - "smallint": { - "$comment": "canonical uri: https://example.com/ref-and-id1-int.json", - "$id": "/draft2020-12/ref-and-id1-int.json", - "maximum": 2, - }, - }, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # data is valid against first definition - (5, True), - # data is invalid against first definition - (50, False), - ], - ) - def test_order_of_evaluation_id_and_anchor_and_ref(self, test_object, valid): - schema = { - "$comment": "$id must be evaluated before $ref to get the proper $ref destination", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://example.com/draft2020-12/ref-and-id2/base.json", - "$ref": "#bigint", - "$defs": { - "bigint": { - "$comment": "canonical uri: /ref-and-id2/base.json#/$defs/bigint; another valid uri for this location: /ref-and-id2/base.json#bigint", - "$anchor": "bigint", - "maximum": 10, - }, - "smallint": { - "$comment": "canonical uri: https://example.com/ref-and-id2#/$defs/smallint; another valid uri for this location: https://example.com/ref-and-id2/#bigint", - "$id": "https://example.com/draft2020-12/ref-and-id2/", - "$anchor": "bigint", - "maximum": 2, - }, - }, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # valid under the URN IDed schema - ({"foo": 37}, True), - # invalid under the URN IDed schema - ({"foo": 12}, False), - ], - ) - def test_simple_URN_base_URI_with_ref_via_the_URN(self, test_object, valid): - schema = { - "$comment": "URIs do not have to have HTTP(s) schemes", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "urn:uuid:deadbeef-1234-ffff-ffff-4321feebdaed", - "minimum": 30, - "properties": {"foo": {"$ref": "urn:uuid:deadbeef-1234-ffff-ffff-4321feebdaed"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a string is valid - ({"foo": "bar"}, True), - # a non-string is invalid - ({"foo": 12}, False), - ], - ) - def test_simple_URN_base_URI_with_JSON_pointer(self, test_object, valid): - schema = { - "$comment": "URIs do not have to have HTTP(s) schemes", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "urn:uuid:deadbeef-1234-00ff-ff00-4321feebdaed", - "properties": {"foo": {"$ref": "#/$defs/bar"}}, - "$defs": {"bar": {"type": "string"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a string is valid - ({"foo": "bar"}, True), - # a non-string is invalid - ({"foo": 12}, False), - ], - ) - def test_URN_base_URI_with_NSS(self, test_object, valid): - schema = { - "$comment": "RFC 8141 §2.2", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "urn:example:1/406/47452/2", - "properties": {"foo": {"$ref": "#/$defs/bar"}}, - "$defs": {"bar": {"type": "string"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a string is valid - ({"foo": "bar"}, True), - # a non-string is invalid - ({"foo": 12}, False), - ], - ) - def test_URN_base_URI_with_r_component(self, test_object, valid): - schema = { - "$comment": "RFC 8141 §2.3.1", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "urn:example:foo-bar-baz-qux?+CCResolve:cc=uk", - "properties": {"foo": {"$ref": "#/$defs/bar"}}, - "$defs": {"bar": {"type": "string"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a string is valid - ({"foo": "bar"}, True), - # a non-string is invalid - ({"foo": 12}, False), - ], - ) - def test_URN_base_URI_with_q_component(self, test_object, valid): - schema = { - "$comment": "RFC 8141 §2.3.2", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "urn:example:weather?=op=map&lat=39.56&lon=-104.85&datetime=1969-07-21T02:56:15Z", - "properties": {"foo": {"$ref": "#/$defs/bar"}}, - "$defs": {"bar": {"type": "string"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a string is valid - ({"foo": "bar"}, True), - # a non-string is invalid - ({"foo": 12}, False), - ], - ) - def test_URN_base_URI_with_URN_and_JSON_pointer_ref(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "urn:uuid:deadbeef-1234-0000-0000-4321feebdaed", - "properties": { - "foo": {"$ref": "urn:uuid:deadbeef-1234-0000-0000-4321feebdaed#/$defs/bar"} - }, - "$defs": {"bar": {"type": "string"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a string is valid - ({"foo": "bar"}, True), - # a non-string is invalid - ({"foo": 12}, False), - ], - ) - def test_URN_base_URI_with_URN_and_anchor_ref(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "urn:uuid:deadbeef-1234-ff00-00ff-4321feebdaed", - "properties": { - "foo": {"$ref": "urn:uuid:deadbeef-1234-ff00-00ff-4321feebdaed#something"} - }, - "$defs": {"bar": {"$anchor": "something", "type": "string"}}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a string is valid - ("bar", True), - # a non-string is invalid - (12, False), - ], - ) - def test_URN_ref_with_nested_pointer_ref(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "urn:uuid:deadbeef-4321-ffff-ffff-1234feebdaed", - "$defs": { - "foo": { - "$id": "urn:uuid:deadbeef-4321-ffff-ffff-1234feebdaed", - "$defs": {"bar": {"type": "string"}}, - "$ref": "#/$defs/bar", - } - }, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a non-integer is invalid due to the $ref - ("foo", False), - # an integer is valid - (12, True), - ], - ) - @pytest.mark.xfail(reason="if not implemented") - def test_ref_to_if(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "http://example.com/ref/if", - "if": {"$id": "http://example.com/ref/if", "type": "integer"}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a non-integer is invalid due to the $ref - ("foo", False), - # an integer is valid - (12, True), - ], - ) - @pytest.mark.xfail(reason="then not implemented") - def test_ref_to_then(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "http://example.com/ref/then", - "then": {"$id": "http://example.com/ref/then", "type": "integer"}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a non-integer is invalid due to the $ref - ("foo", False), - # an integer is valid - (12, True), - ], - ) - @pytest.mark.xfail(reason="else not implemented") - def test_ref_to_else(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "http://example.com/ref/else", - "else": {"$id": "http://example.com/ref/else", "type": "integer"}, - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # a string is valid - ("foo", True), - # an integer is invalid - (12, False), - ], - ) - def test_ref_with_absolute_path_reference(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "http://example.com/ref/absref.json", - "$defs": { - "a": {"$id": "http://example.com/ref/absref/foobar.json", "type": "number"}, - "b": {"$id": "http://example.com/absref/foobar.json", "type": "string"}, - }, - "$ref": "/absref/foobar.json", - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # number is valid - (1, True), - # non-number is invalid - ("a", False), - ], - ) - def test_id_with_file_URI_still_resolves_pointers___nix(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "file:///folder/file.json", - "$defs": {"foo": {"type": "number"}}, - "$ref": "#/$defs/foo", - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # number is valid - (1, True), - # non-number is invalid - ("a", False), - ], - ) - def test_id_with_file_URI_still_resolves_pointers___windows(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "file:///c:/folder/file.json", - "$defs": {"foo": {"type": "number"}}, - "$ref": "#/$defs/foo", - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - @pytest.mark.parametrize( - ["test_object", "valid"], - [ - # number is valid - (1, True), - # non-number is invalid - ("a", False), - ], - ) - def test_empty_tokens_in_ref_json_pointer(self, test_object, valid): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$defs": {"": {"$defs": {"": {"type": "number"}}}}, - "allOf": [{"$ref": "#/$defs//$defs/"}], - } - if valid: - validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - - class TestAnyOf: @pytest.mark.parametrize("target_obj", [123, True]) @pytest.mark.parametrize("temperature", [None, 0.1, 1]) diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py new file mode 100644 index 000000000..49f035283 --- /dev/null +++ b/tests/unit/library/json/test_refs.py @@ -0,0 +1,978 @@ +from .utils import check_match_failure, generate_and_check + +import pytest +from jsonschema import ValidationError, validate + +from json import dumps as json_dumps + + +class TestRefs: + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # match + ({"foo": False}, True), + # recursive match + ({"foo": {"foo": False}}, True), + # mismatch + ({"bar": False}, False), + # recursive mismatch + ({"foo": {"bar": False}}, False), + ], + ) + def test_root_pointer_ref(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": {"foo": {"$ref": "#"}}, + "additionalProperties": False, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # match + ({"bar": 3}, True), + # mismatch + ({"bar": True}, False), + ], + ) + def test_relative_pointer_ref_to_object(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": {"foo": {"type": "integer"}, "bar": {"$ref": "#/properties/foo"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # match array + ([1, 2], True), + # mismatch array + ([1, "foo"], False), + ], + ) + def test_relative_pointer_ref_to_array(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "prefixItems": [{"type": "integer"}, {"$ref": "#/prefixItems/0"}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # slash invalid + ({"slash": "aoeu"}, False), + # tilde invalid + ({"tilde": "aoeu"}, False), + # percent invalid + ({"percent": "aoeu"}, False), + # slash valid + ({"slash": 123}, True), + # tilde valid + ({"tilde": 123}, True), + # percent valid + ({"percent": 123}, True), + ], + ) + def test_escaped_pointer_ref(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "tilde~field": {"type": "integer"}, + "slash/field": {"type": "integer"}, + "percent%field": {"type": "integer"}, + }, + "properties": { + "tilde": {"$ref": "#/$defs/tilde~0field"}, + "slash": {"$ref": "#/$defs/slash~1field"}, + "percent": {"$ref": "#/$defs/percent%25field"}, + }, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # nested ref valid + (5, True), + # nested ref invalid + ("a", False), + ], + ) + def test_nested_refs(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "a": {"type": "integer"}, + "b": {"$ref": "#/$defs/a"}, + "c": {"$ref": "#/$defs/b"}, + }, + "$ref": "#/$defs/c", + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # ref valid, maxItems valid + ({"foo": []}, True), + # ref valid, maxItems invalid + ({"foo": [1, 2, 3]}, False), + # ref invalid + ({"foo": "string"}, False), + ], + ) + def test_ref_applies_alongside_sibling_keywords(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": {"reffed": {"type": "array"}}, + "properties": {"foo": {"$ref": "#/$defs/reffed", "maxItems": 2}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # remote ref valid + ({"minLength": 1}, True), + # remote ref invalid + ({"minLength": -1}, False), + ], + ) + @pytest.mark.xfail(reason="Remote refs are not supported") + def test_remote_ref_containing_refs_itself(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "https://json-schema.org/draft/2020-12/schema", + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # property named $ref valid + ({"$ref": "a"}, True), + # property named $ref invalid + ({"$ref": 2}, False), + ], + ) + def test_property_named_ref_that_is_not_a_reference(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": {"$ref": {"type": "string"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # property named $ref valid + ({"$ref": "a"}, True), + # property named $ref invalid + ({"$ref": 2}, False), + ], + ) + def test_property_named_ref_containing_an_actual_ref(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": {"$ref": {"$ref": "#/$defs/is-string"}}, + "$defs": {"is-string": {"type": "string"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # any value is valid + ("foo", True) + ], + ) + def test_ref_to_boolean_schema_true(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/bool", + "$defs": {"bool": True}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # any value is invalid + ("foo", False) + ], + ) + @pytest.mark.xfail(reason="false schema is not implemented") + def test_ref_to_boolean_schema_false(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/bool", + "$defs": {"bool": False}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # valid tree + ( + { + "meta": "root", + "nodes": [ + { + "value": 1, + "subtree": { + "meta": "child", + "nodes": [{"value": 1.1}, {"value": 1.2}], + }, + }, + { + "value": 2, + "subtree": { + "meta": "child", + "nodes": [{"value": 2.1}, {"value": 2.2}], + }, + }, + ], + }, + True, + ), + # invalid tree + ( + { + "meta": "root", + "nodes": [ + { + "value": 1, + "subtree": { + "meta": "child", + "nodes": [{"value": "string is invalid"}, {"value": 1.2}], + }, + }, + { + "value": 2, + "subtree": { + "meta": "child", + "nodes": [{"value": 2.1}, {"value": 2.2}], + }, + }, + ], + }, + False, + ), + ], + ) + def test_Recursive_references_between_schemas(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://localhost:1234/draft2020-12/tree", + "description": "tree of nodes", + "type": "object", + "properties": { + "meta": {"type": "string"}, + "nodes": {"type": "array", "items": {"$ref": "node"}}, + }, + "required": ["meta", "nodes"], + "$defs": { + "node": { + "$id": "http://localhost:1234/draft2020-12/node", + "description": "node", + "type": "object", + "properties": {"value": {"type": "number"}, "subtree": {"$ref": "tree"}}, + "required": ["value"], + } + }, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # object with numbers is valid + ({'foo"bar': 1}, True), + # object with strings is invalid + ({'foo"bar': "1"}, False), + ], + ) + def test_refs_with_quote(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": {'foo"bar': {"$ref": "#/$defs/foo%22bar"}}, + "$defs": {'foo"bar': {"type": "number"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # referenced subschema doesn't see annotations from properties + ({"prop1": "match"}, False) + ], + ) + @pytest.mark.xfail(reason="unevaluatedProperties is not implemented") + def test_ref_creates_new_scope_when_adjacent_to_keywords(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": {"A": {"unevaluatedProperties": False}}, + "properties": {"prop1": {"type": "string"}}, + "$ref": "#/$defs/A", + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # do not evaluate the $ref inside the enum, matching any string + ("this is a string", False), + # do not evaluate the $ref inside the enum, definition exact match + ({"type": "string"}, False), + # match the enum exactly + ({"$ref": "#/$defs/a_string"}, True), + ], + ) + def test_naive_replacement_of_ref_with_its_destination_is_not_correct( + self, test_object, valid + ): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": {"a_string": {"type": "string"}}, + "enum": [{"$ref": "#/$defs/a_string"}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # invalid on inner field + ({"bar": "a", "foo": {"bar": 1}}, False), + # invalid on outer field + ({ "bar": 1, "foo": {"bar": "a"}}, False), + # valid on both fields + ({"bar": "a", "foo": {"bar": "a"}, }, True), + ], + ) + def test_refs_with_relative_uris_and_defs(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://example.com/schema-relative-uri-defs1.json", + "properties": { + "foo": { + "$id": "schema-relative-uri-defs2.json", + "$defs": {"inner": {"properties": {"bar": {"type": "string"}}}}, + "$ref": "#/$defs/inner", + } + }, + "$ref": "schema-relative-uri-defs2.json", + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # invalid on inner field + ({"bar": "a", "foo": {"bar": 1}}, False), + # invalid on outer field + ({"bar": 1, "foo": {"bar": "a"}}, False), + # valid on both fields + ({"bar": "a", "foo": {"bar": "a"}}, True), + ], + ) + def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://example.com/schema-refs-absolute-uris-defs1.json", + "properties": { + "foo": { + "$id": "http://example.com/schema-refs-absolute-uris-defs2.json", + "$defs": {"inner": {"properties": {"bar": {"type": "string"}}}}, + "$ref": "#/$defs/inner", + } + }, + "$ref": "schema-refs-absolute-uris-defs2.json", + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # number is valid + (1, True), + # non-number is invalid + ("a", False), + ], + ) + def test_id_must_be_resolved_against_nearest_parent_not_just_immediate_parent( + self, test_object, valid + ): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://example.com/a.json", + "$defs": { + "x": { + "$id": "http://example.com/b/c.json", + "not": {"$defs": {"y": {"$id": "d.json", "type": "number"}}}, + } + }, + "allOf": [{"$ref": "http://example.com/b/d.json"}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # data is valid against first definition + (5, True), + # data is invalid against first definition + (50, False), + ], + ) + def test_order_of_evaluation_id_and_ref(self, test_object, valid): + schema = { + "$comment": "$id must be evaluated before $ref to get the proper $ref destination", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/draft2020-12/ref-and-id1/base.json", + "$ref": "int.json", + "$defs": { + "bigint": { + "$comment": "canonical uri: https://example.com/ref-and-id1/int.json", + "$id": "int.json", + "maximum": 10, + }, + "smallint": { + "$comment": "canonical uri: https://example.com/ref-and-id1-int.json", + "$id": "/draft2020-12/ref-and-id1-int.json", + "maximum": 2, + }, + }, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # data is valid against first definition + (5, True), + # data is invalid against first definition + (50, False), + ], + ) + def test_order_of_evaluation_id_and_anchor_and_ref(self, test_object, valid): + schema = { + "$comment": "$id must be evaluated before $ref to get the proper $ref destination", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/draft2020-12/ref-and-id2/base.json", + "$ref": "#bigint", + "$defs": { + "bigint": { + "$comment": "canonical uri: /ref-and-id2/base.json#/$defs/bigint; another valid uri for this location: /ref-and-id2/base.json#bigint", + "$anchor": "bigint", + "maximum": 10, + }, + "smallint": { + "$comment": "canonical uri: https://example.com/ref-and-id2#/$defs/smallint; another valid uri for this location: https://example.com/ref-and-id2/#bigint", + "$id": "https://example.com/draft2020-12/ref-and-id2/", + "$anchor": "bigint", + "maximum": 2, + }, + }, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # valid under the URN IDed schema + ({"foo": 37}, True), + # invalid under the URN IDed schema + ({"foo": 12}, False), + ], + ) + def test_simple_URN_base_URI_with_ref_via_the_URN(self, test_object, valid): + schema = { + "$comment": "URIs do not have to have HTTP(s) schemes", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:uuid:deadbeef-1234-ffff-ffff-4321feebdaed", + "minimum": 30, + "properties": {"foo": {"$ref": "urn:uuid:deadbeef-1234-ffff-ffff-4321feebdaed"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a string is valid + ({"foo": "bar"}, True), + # a non-string is invalid + ({"foo": 12}, False), + ], + ) + def test_simple_URN_base_URI_with_JSON_pointer(self, test_object, valid): + schema = { + "$comment": "URIs do not have to have HTTP(s) schemes", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:uuid:deadbeef-1234-00ff-ff00-4321feebdaed", + "properties": {"foo": {"$ref": "#/$defs/bar"}}, + "$defs": {"bar": {"type": "string"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a string is valid + ({"foo": "bar"}, True), + # a non-string is invalid + ({"foo": 12}, False), + ], + ) + def test_URN_base_URI_with_NSS(self, test_object, valid): + schema = { + "$comment": "RFC 8141 §2.2", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:example:1/406/47452/2", + "properties": {"foo": {"$ref": "#/$defs/bar"}}, + "$defs": {"bar": {"type": "string"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a string is valid + ({"foo": "bar"}, True), + # a non-string is invalid + ({"foo": 12}, False), + ], + ) + def test_URN_base_URI_with_r_component(self, test_object, valid): + schema = { + "$comment": "RFC 8141 §2.3.1", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:example:foo-bar-baz-qux?+CCResolve:cc=uk", + "properties": {"foo": {"$ref": "#/$defs/bar"}}, + "$defs": {"bar": {"type": "string"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a string is valid + ({"foo": "bar"}, True), + # a non-string is invalid + ({"foo": 12}, False), + ], + ) + def test_URN_base_URI_with_q_component(self, test_object, valid): + schema = { + "$comment": "RFC 8141 §2.3.2", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:example:weather?=op=map&lat=39.56&lon=-104.85&datetime=1969-07-21T02:56:15Z", + "properties": {"foo": {"$ref": "#/$defs/bar"}}, + "$defs": {"bar": {"type": "string"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a string is valid + ({"foo": "bar"}, True), + # a non-string is invalid + ({"foo": 12}, False), + ], + ) + def test_URN_base_URI_with_URN_and_JSON_pointer_ref(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:uuid:deadbeef-1234-0000-0000-4321feebdaed", + "properties": { + "foo": {"$ref": "urn:uuid:deadbeef-1234-0000-0000-4321feebdaed#/$defs/bar"} + }, + "$defs": {"bar": {"type": "string"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a string is valid + ({"foo": "bar"}, True), + # a non-string is invalid + ({"foo": 12}, False), + ], + ) + def test_URN_base_URI_with_URN_and_anchor_ref(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:uuid:deadbeef-1234-ff00-00ff-4321feebdaed", + "properties": { + "foo": {"$ref": "urn:uuid:deadbeef-1234-ff00-00ff-4321feebdaed#something"} + }, + "$defs": {"bar": {"$anchor": "something", "type": "string"}}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a string is valid + ("bar", True), + # a non-string is invalid + (12, False), + ], + ) + def test_URN_ref_with_nested_pointer_ref(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "urn:uuid:deadbeef-4321-ffff-ffff-1234feebdaed", + "$defs": { + "foo": { + "$id": "urn:uuid:deadbeef-4321-ffff-ffff-1234feebdaed", + "$defs": {"bar": {"type": "string"}}, + "$ref": "#/$defs/bar", + } + }, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a non-integer is invalid due to the $ref + ("foo", False), + # an integer is valid + (12, True), + ], + ) + @pytest.mark.xfail(reason="if not implemented") + def test_ref_to_if(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "http://example.com/ref/if", + "if": {"$id": "http://example.com/ref/if", "type": "integer"}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a non-integer is invalid due to the $ref + ("foo", False), + # an integer is valid + (12, True), + ], + ) + @pytest.mark.xfail(reason="then not implemented") + def test_ref_to_then(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "http://example.com/ref/then", + "then": {"$id": "http://example.com/ref/then", "type": "integer"}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a non-integer is invalid due to the $ref + ("foo", False), + # an integer is valid + (12, True), + ], + ) + @pytest.mark.xfail(reason="else not implemented") + def test_ref_to_else(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "http://example.com/ref/else", + "else": {"$id": "http://example.com/ref/else", "type": "integer"}, + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # a string is valid + ("foo", True), + # an integer is invalid + (12, False), + ], + ) + def test_ref_with_absolute_path_reference(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://example.com/ref/absref.json", + "$defs": { + "a": {"$id": "http://example.com/ref/absref/foobar.json", "type": "number"}, + "b": {"$id": "http://example.com/absref/foobar.json", "type": "string"}, + }, + "$ref": "/absref/foobar.json", + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # number is valid + (1, True), + # non-number is invalid + ("a", False), + ], + ) + def test_id_with_file_URI_still_resolves_pointers___nix(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "file:///folder/file.json", + "$defs": {"foo": {"type": "number"}}, + "$ref": "#/$defs/foo", + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # number is valid + (1, True), + # non-number is invalid + ("a", False), + ], + ) + def test_id_with_file_URI_still_resolves_pointers___windows(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "file:///c:/folder/file.json", + "$defs": {"foo": {"type": "number"}}, + "$ref": "#/$defs/foo", + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + + @pytest.mark.parametrize( + ["test_object", "valid"], + [ + # number is valid + (1, True), + # non-number is invalid + ("a", False), + ], + ) + def test_empty_tokens_in_ref_json_pointer(self, test_object, valid): + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": {"": {"$defs": {"": {"type": "number"}}}}, + "allOf": [{"$ref": "#/$defs//$defs/"}], + } + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): + validate(instance=test_object, schema=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) \ No newline at end of file diff --git a/tests/unit/library/test_json_stringformat.py b/tests/unit/library/json/test_string_format.py similarity index 99% rename from tests/unit/library/test_json_stringformat.py rename to tests/unit/library/json/test_string_format.py index b484ccca2..3259274bb 100644 --- a/tests/unit/library/test_json_stringformat.py +++ b/tests/unit/library/json/test_string_format.py @@ -2,7 +2,9 @@ import pytest import json -from .test_json import generate_and_check, check_match_failure + +from .utils import generate_and_check +from .utils import check_match_failure class TestDate: diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py new file mode 100644 index 000000000..920571d27 --- /dev/null +++ b/tests/unit/library/json/utils.py @@ -0,0 +1,58 @@ +from typing import Union, Optional, Any, Set +from guidance import json as gen_json +from guidance.library._json import JSONSchema + +from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check + +from jsonschema import validate + + +import json +from functools import partial +from json import dumps as json_dumps, loads as json_loads + + +def generate_and_check( + target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None +): + if isinstance(schema_obj, str): + schema_obj = json_loads(schema_obj) + + # Sanity check what we're being asked + validate(instance=target_obj, schema=schema_obj) + prepared_json = json_dumps(target_obj) + assert json.loads(prepared_json) == target_obj + + # Now test that the grammar can recognize and generate prepared_json + # We partial in the grammar_callable + if desired_temperature is not None: + grammar_callable = partial( + gen_json, schema=schema_obj, temperature=desired_temperature + ) + else: + grammar_callable = partial(gen_json, schema=schema_obj) + + lm = _generate_and_check( + grammar_callable, + test_string=prepared_json, + ) + check_run_with_temperature(lm, desired_temperature) + + +def check_match_failure( + *, + bad_string: str, + good_bytes: Optional[bytes] = None, + failure_byte: Optional[bytes] = None, + allowed_bytes: Optional[Set[bytes]] = None, + schema_obj: Union[str, JSONSchema], +): + grammar = gen_json(schema=schema_obj) + + _check_match_failure( + bad_string=bad_string, + good_bytes=good_bytes, + failure_byte=failure_byte, + allowed_bytes=allowed_bytes, + grammar=grammar, + ) \ No newline at end of file From 496718ed7e8a0cc62744401d174831e03ac352f1 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 09:46:27 -0700 Subject: [PATCH 37/70] blacken json tests --- tests/unit/library/json/test_allOf.py | 61 +++-- tests/unit/library/json/test_json.py | 259 ++++++++---------- tests/unit/library/json/test_refs.py | 16 +- tests/unit/library/json/test_string_format.py | 140 +++++++--- tests/unit/library/json/utils.py | 28 +- 5 files changed, 291 insertions(+), 213 deletions(-) diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py index 74aa539b6..1a388d2ce 100644 --- a/tests/unit/library/json/test_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -6,8 +6,8 @@ from jsonschema import ValidationError, validate from guidance import json as gen_json -from .utils import generate_and_check -from .utils import check_match_failure + +from .utils import check_match_failure, generate_and_check class TestAllOf: @@ -139,7 +139,6 @@ def test_allOf_simple_maximum(self, test_object, valid): validate(instance=test_object, schema=schema) check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - @pytest.mark.parametrize( ["test_object", "valid"], [ @@ -358,15 +357,18 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): ({"foo": 0, "bar": 5, "baz": 4}, False), # invalid: baz is not an integer or null ({"foo": 0, "bar": 5, "baz": "quxx"}, False), - ] + ], ) @pytest.mark.parametrize( "schema", [ # The following are equivalent to this: { - "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"], "minimum": 5} + "properties": { + "foo": {"type": ["integer", "null"], "maximum": 4}, + "bar": {"minimum": 5, "maximum": 5}, + }, + "additionalProperties": {"type": ["integer", "null"], "minimum": 5}, }, # additionalProperties in parent schema { @@ -374,16 +376,22 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}} ], "properties": {"bar": {"maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"]} + "additionalProperties": {"type": ["integer", "null"]}, }, # additionalProperties in allOf { "allOf": [ - {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}, - {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}} + { + "properties": {"foo": {"maximum": 4}}, + "additionalProperties": {"minimum": 5}, + }, + { + "properties": {"bar": {"maximum": 5}}, + "additionalProperties": {"type": ["integer", "null"]}, + }, ] }, - ] + ], ) def test_additionalProperties_in_allOf(self, schema, test_object, valid): if valid: @@ -397,19 +405,19 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid): @pytest.mark.parametrize( "test_object, valid", [ - ({}, True), # empty object is valid - ({"foo": 1}, False), # foo is not a string - ({"foo": "x"}, False), # foo is not an integer - ({"foo": True}, False), # foo is not a string or an integer - ] + ({}, True), # empty object is valid + ({"foo": 1}, False), # foo is not a string + ({"foo": "x"}, False), # foo is not an integer + ({"foo": True}, False), # foo is not a string or an integer + ], ) def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): schema = { "type": "object", "allOf": [ {"additionalProperties": {"type": "integer"}}, - {"additionalProperties": {"type": "string"}} - ] + {"additionalProperties": {"type": "string"}}, + ], } try: if valid: @@ -448,15 +456,18 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): ([0, 5, 4], False), # invalid: baz is not an integer or null ([0, 5, "quxx"], False), - ] + ], ) @pytest.mark.parametrize( "schema", [ # The following are equivalent to this: { - "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}], - "items": {"type": ["integer", "null"], "minimum": 5} + "prefixItems": [ + {"type": ["integer", "null"], "maximum": 4}, + {"minimum": 5, "maximum": 5}, + ], + "items": {"type": ["integer", "null"], "minimum": 5}, }, # items in parent schema { @@ -464,17 +475,19 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, ], "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], - "items": {"type": ["integer", "null"]} - + "items": {"type": ["integer", "null"]}, }, # items in allOf { "allOf": [ {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, - {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}} + { + "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], + "items": {"type": ["integer", "null"]}, + }, ] }, - ] + ], ) def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid): if valid: diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index d94a98b69..f3602fddd 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -1,21 +1,21 @@ import json +from json import dumps as json_dumps import pytest -from jsonschema import validate, ValidationError -from json import dumps as json_dumps +from jsonschema import ValidationError, validate from guidance import json as gen_json from guidance import models - from guidance.library._json import IGNORED_KEYS -from .utils import check_match_failure, generate_and_check +from .utils import check_match_failure, generate_and_check # Common sets of allowed_bytes INTEGER_LEADING = {b"-", b"0", *{bytes([i]) for i in range(ord("1"), ord("9") + 1)}} INTEGER_FOLLOWING = {bytes([i]) for i in range(ord("0"), ord("9") + 1)} A_to_Z = {bytes([i]) for i in range(ord("A"), ord("Z") + 1)} + def test_null(): schema = """{"type": "null" }""" @@ -130,6 +130,7 @@ def test_bad_number(self, bad_string, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) + class TestBoundedNumeric: @pytest.mark.parametrize( "instance, schema, should_pass", @@ -139,11 +140,15 @@ class TestBoundedNumeric: (-5, {"type": "integer", "minimum": -5}, True), pytest.param( *(5.0, {"type": "integer", "minimum": 5}, True), - marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't") + marks=pytest.mark.xfail( + reason="JSON technically allows trailing zeroes, but we currently don't" + ), ), pytest.param( *(-5.0, {"type": "integer", "minimum": -5}, True), - marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't") + marks=pytest.mark.xfail( + reason="JSON technically allows trailing zeroes, but we currently don't" + ), ), (5.1, {"type": "integer", "minimum": 5}, False), (-5.1, {"type": "integer", "minimum": -5}, False), @@ -203,7 +208,11 @@ class TestBoundedNumeric: (5.1, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True), (-9.9, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True), (5.0, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, False), - (-10.0, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, False), + ( + -10.0, + {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, + False, + ), (9.9, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True), (-5.1, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True), # --- Edge cases --- @@ -244,10 +253,10 @@ class TestBoundedNumeric: (0.2999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, True), (-0.2999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, True), (0.0999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False), - (-0.0999, {"type": "number", "minimum": -.3, "maximum": -0.1}, False), + (-0.0999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False), (0.3001, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False), (-0.3001, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False), - ] + ], ) def test_numeric_validation(self, instance, schema, should_pass): # Sanity check @@ -257,10 +266,7 @@ def test_numeric_validation(self, instance, schema, should_pass): else: with pytest.raises(ValidationError): validate(instance, schema=schema) - check_match_failure( - bad_string=json_dumps(instance), - schema_obj=schema - ) + check_match_failure(bad_string=json_dumps(instance), schema_obj=schema) class TestString: @@ -341,9 +347,7 @@ def test_regex_bad(self, bad_string: str, good_bytes, failure_byte, allowed_byte schema_obj=schema_obj, ) - @pytest.mark.parametrize( - "string", ["aA\u001f", '"""'] - ) + @pytest.mark.parametrize("string", ["aA\u001f", '"""']) def test_regex_properly_escaped_good(self, string): schema_obj = {"type": "string", "pattern": r".{3}"} # First sanity check what we're setting up @@ -356,13 +360,15 @@ def test_regex_properly_escaped_good(self, string): [ ( '"\\u001f\\u001f\u001f', - b'"\\u001f\\u001f', # able to match the first two stringified bytes - '\u001f'.encode(), # fails on a literal \x1f byte - None # hard to write a set of allowed bytes here + b'"\\u001f\\u001f', # able to match the first two stringified bytes + "\u001f".encode(), # fails on a literal \x1f byte + None, # hard to write a set of allowed bytes here ), ], ) - def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_byte, allowed_bytes): + def test_regex_properly_escaped_bad( + self, bad_string: str, good_bytes, failure_byte, allowed_bytes + ): # Note that the strings being fed in include the double quotes required # to make them JSON strings schema_obj = {"type": "string", "pattern": r".{3}"} @@ -374,7 +380,6 @@ def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_b schema_obj=schema_obj, ) - @pytest.mark.parametrize( "my_string", ["a", "bb", "ccc", "150", ",?", ".\t\n", "(){", "aA7", "\\9O"] ) @@ -673,28 +678,37 @@ def test_required_is_required(self): generate_and_check({"b": 1}, schema) generate_and_check({"a": 1, "b": "xyz"}, schema) check_match_failure( - bad_string=json_dumps( - {"a": 1} - ), + bad_string=json_dumps({"a": 1}), schema_obj=schema, ) def test_validated_against_additionalProperties(self): - schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b"], "additionalProperties": {"type": "integer"}} + schema = { + "type": "object", + "properties": {"a": {"type": "integer"}}, + "required": ["b"], + "additionalProperties": {"type": "integer"}, + } generate_and_check({"b": 1}, schema) generate_and_check({"a": 1, "b": 42}, schema) check_match_failure( - bad_string=json_dumps( - {"a": 1, "b": "string"} - ), + bad_string=json_dumps({"a": 1, "b": "string"}), schema_obj=schema, ) def test_false_additionalProperties_fails(self): - schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b", "c"], "additionalProperties": False} + schema = { + "type": "object", + "properties": {"a": {"type": "integer"}}, + "required": ["b", "c"], + "additionalProperties": False, + } with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) - assert ve.value.args[0] == "Required properties not in properties but additionalProperties is False. Missing required properties: ['b', 'c']" + assert ( + ve.value.args[0] + == "Required properties not in properties but additionalProperties is False. Missing required properties: ['b', 'c']" + ) class TestSimpleArray: @@ -760,7 +774,6 @@ def test_object_list(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( ["bad_string", "good_bytes", "failure_byte", "allowed_bytes"], [ @@ -870,7 +883,6 @@ def test_good_with_items(self, min_items, max_items, target_obj): } generate_and_check(target_obj, schema_obj) - @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -951,7 +963,6 @@ def test_bad_with_prefix_and_items( schema_obj=schema_obj, ) - @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1016,7 +1027,6 @@ def test_bad_with_prefix( schema_obj=schema_obj, ) - @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1214,13 +1224,12 @@ def test_allOf_ref(self): generate_and_check(target_obj, schema_obj) def test_allOf_bad_schema(self): - schema = { - "allOf" : [{ "type": "integer" }, { "type": "string" }] - } + schema = {"allOf": [{"type": "integer"}, {"type": "string"}]} with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ve.value.args[0] == "allOf with conflicting types" + class TestOneOf: @pytest.mark.parametrize("target_obj", [123, 42]) def test_oneOf_simple(self, target_obj): @@ -1235,7 +1244,6 @@ def test_oneOf_simple(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) - @pytest.mark.parametrize("target_obj", [123, True]) def test_oneOf_compound(self, target_obj): schema = """{ @@ -1273,7 +1281,6 @@ def test_enum(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1293,7 +1300,6 @@ def test_bad_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1321,13 +1327,10 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes) ("2", False), ("1", False), (True, False), - ] + ], ) def test_typed_enum_single_type(self, obj, valid): - schema_obj = { - "enum": [1, "2", True], - "type": "integer" - } + schema_obj = {"enum": [1, "2", True], "type": "integer"} if valid: validate(instance=obj, schema=schema_obj) generate_and_check(obj, schema_obj) @@ -1344,13 +1347,10 @@ def test_typed_enum_single_type(self, obj, valid): ("2", True), ("1", False), (True, False), - ] + ], ) def test_typed_enum_multiple_types(self, obj, valid): - schema_obj = { - "enum": [1, "2", True], - "type": ["integer", "string"] - } + schema_obj = {"enum": [1, "2", True], "type": ["integer", "string"]} if valid: validate(instance=obj, schema=schema_obj) generate_and_check(obj, schema_obj) @@ -1360,14 +1360,12 @@ def test_typed_enum_multiple_types(self, obj, valid): check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj) def test_invalid_typed_enum(self): - schema_obj = { - "enum": [1, "2"], - "type": "boolean" - } + schema_obj = {"enum": [1, "2"], "type": "boolean"} with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']" + class TestConst: def test_constant_int(self): # First sanity check what we're setting up @@ -1427,45 +1425,29 @@ def test_constant_precedence(self): ) def test_valid_typed_const(self): - schema_obj = { - "const": 1, - "type": "integer" - } + schema_obj = {"const": 1, "type": "integer"} target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) def test_invalid_typed_const(self): - schema_obj = { - "const": 1, - "type": "boolean" - } + schema_obj = {"const": 1, "type": "boolean"} with pytest.raises(ValidationError): gen_json(schema=schema_obj) def test_valid_enum_const(self): - schema_obj = { - "const": 1, - "enum": [1, 2, 3] - } + schema_obj = {"const": 1, "enum": [1, 2, 3]} target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) def test_invalid_enum_const(self): - schema_obj = { - "const": 1, - "enum": [2, 3] - } + schema_obj = {"const": 1, "enum": [2, 3]} with pytest.raises(ValidationError): gen_json(schema=schema_obj) def test_valid_typed_enum_const(self): - schema_obj = { - "const": 1, - "enum": [1, "2", 3], - "type": "integer" - } + schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"} target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) @@ -1473,17 +1455,13 @@ def test_valid_typed_enum_const(self): @pytest.mark.parametrize( "const", [ - "2", # right enum, wrong type - 2, # wrong enum, right type - "3", # wrong enum, wrong type - ] + "2", # right enum, wrong type + 2, # wrong enum, right type + "3", # wrong enum, wrong type + ], ) def test_invalid_typed_enum_const(self, const): - schema_obj = { - "const": const, - "enum": [1, "2", 3], - "type": "integer" - } + schema_obj = {"const": const, "enum": [1, "2", 3], "type": "integer"} with pytest.raises(ValidationError): gen_json(schema=schema_obj) @@ -1531,11 +1509,15 @@ def test_simple_additional_properties(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ - ({"a": "1"}, b'{"a": ', b'"', INTEGER_LEADING, ), + ( + {"a": "1"}, + b'{"a": ', + b'"', + INTEGER_LEADING, + ), ( {"a": 1, "b": 1.5}, b'{"a": 1, "b": 1', @@ -1555,9 +1537,7 @@ def test_simple_bad_type(self, bad_obj, good_bytes, failure_byte, allowed_bytes) schema_obj=schema_obj, ) - @pytest.mark.parametrize( - "target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}] - ) + @pytest.mark.parametrize("target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}]) def test_anyOf_additional_properties(self, target_obj): # First sanity check what we're setting up schema_obj = json.loads(self.anyOf_schema) @@ -1566,7 +1546,6 @@ def test_anyOf_additional_properties(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1608,7 +1587,6 @@ def test_properties_and_additional_properties(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1617,9 +1595,7 @@ def test_properties_and_additional_properties(self, target_obj, temperature): ({"a": 1, "b": 2}, b'{"', b"a", {b"m"}), ], ) - def test_combined_missing_properties( - self, bad_obj, good_bytes, failure_byte, allowed_bytes - ): + def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, allowed_bytes): schema_obj = json.loads(self.combined_schema) bad_string = json_dumps(bad_obj) check_match_failure( @@ -1630,7 +1606,6 @@ def test_combined_missing_properties( schema_obj=schema_obj, ) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1759,7 +1734,6 @@ def test_empty_schema(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_string, good_bytes, failure_byte, allowed_bytes", [ @@ -1788,9 +1762,7 @@ def test_empty_schema(self, target_obj, temperature): ), ], ) - def test_bad_empty_schema( - self, bad_string, good_bytes, failure_byte, allowed_bytes - ): + def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_bytes): schema_obj = json.loads(self.empty_schema) check_match_failure( bad_string=bad_string, @@ -1806,7 +1778,12 @@ def test_bad_empty_schema( # Empty property {"type": "object", "properties": {"a": {}}, "required": ["a"]}, # Empty reference - {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]}, + { + "type": "object", + "properties": {"a": {"$ref": "#/$defs/A"}}, + "$defs": {"A": {}}, + "required": ["a"], + }, ], ) @pytest.mark.parametrize( @@ -1837,10 +1814,14 @@ def test_nested_empty_schema(self, schema_obj, target_obj, temperature): # Empty property {"type": "object", "properties": {"a": {}}, "required": ["a"]}, # Empty reference - {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]}, + { + "type": "object", + "properties": {"a": {"$ref": "#/$defs/A"}}, + "$defs": {"A": {}}, + "required": ["a"], + }, ], ) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1883,7 +1864,6 @@ def test_nested_empty_schema_with_props(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1918,7 +1898,6 @@ def test_items(self, schema_obj): [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj ) - def test_no_items(self): schema_obj = {"type": "array", "items": False} check_match_failure( @@ -1951,7 +1930,6 @@ def test_additionalProperties(self, schema_obj): schema_obj, ) - def test_no_additionalProperties(self): schema_obj = {"type": "object", "additionalProperties": False} check_match_failure( @@ -1962,17 +1940,17 @@ def test_no_additionalProperties(self): schema_obj=schema_obj, ) + def test_ignored_keys_allowed_as_properties(): schema_obj = { "type": "object", - "properties": { - key: {"type": "string"} for key in IGNORED_KEYS - }, + "properties": {key: {"type": "string"} for key in IGNORED_KEYS}, "required": list(IGNORED_KEYS), } target_obj = {key: "value" for key in IGNORED_KEYS} generate_and_check(target_obj, schema_obj) + class TestRequiredProperties: schema_obj = { "type": "object", @@ -1981,10 +1959,19 @@ class TestRequiredProperties: "b": {"type": "number"}, "c": {"type": "boolean"}, }, - "additionalProperties": True + "additionalProperties": True, } ALL_REQUIRED = ["a", "b", "c"] - SOME_REQUIRED_SUBSETS = [[], ["a"], ["b"], ["c"], ["a", "b"], ["a", "c"], ["b", "c"], ["a", "b", "c"]] + SOME_REQUIRED_SUBSETS = [ + [], + ["a"], + ["b"], + ["c"], + ["a", "b"], + ["a", "c"], + ["b", "c"], + ["a", "b", "c"], + ] NONE_REQUIRED: list[str] = [] @pytest.mark.parametrize( @@ -1993,7 +1980,7 @@ class TestRequiredProperties: {}, {"d": "hello"}, {"d": 42, "e": True}, - ] + ], ) def test_all_required_good(self, extra_items): schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED} @@ -2013,7 +2000,7 @@ def test_all_required_good(self, extra_items): ({"c": True}), # Missing all ({}), - ] + ], ) def test_all_required_bad(self, bad_obj): schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED} @@ -2028,7 +2015,7 @@ def test_all_required_bad(self, bad_obj): {}, {"d": "hello"}, {"d": 42, "e": True}, - ] + ], ) @pytest.mark.parametrize( "required", @@ -2066,7 +2053,7 @@ def test_some_required_bad(self, required): {}, {"d": "hello"}, {"d": 42, "e": True}, - ] + ], ) @pytest.mark.parametrize( "target_obj", @@ -2079,55 +2066,48 @@ def test_some_required_bad(self, required): {"a": "hello", "c": True}, {"b": 42, "c": True}, {"a": "hello", "b": 42, "c": True}, - ] + ], ) def test_none_required(self, target_obj, extra_items): schema_obj = {**self.schema_obj, "required": self.NONE_REQUIRED} generate_and_check({**target_obj, **extra_items}, schema_obj) + class TestRequiredPropertiesScaling: - @pytest.mark.parametrize( - "num_properties", - [1, 2, 3, 4, 5, 10, 20, 50, 100] - ) + @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100]) def test_many_optional_properties_doesnt_blow_up(self, num_properties): schema_obj = { "type": "object", - "properties": { - f"prop_{i}": {"type": "string"} for i in range(num_properties) - }, - "required": [] # Empty should be worst-case scenario + "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)}, + "required": [], # Empty should be worst-case scenario } from guidance.library._json import GenJson + genjson = GenJson(schema=schema_obj) genjson._join.__wrapped__.cache_clear() _ = genjson.root() cache_info = genjson._join.__wrapped__.cache_info() # Theoretical number of cache misses under the current implementation - expected_misses = 2*num_properties - 1 - MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from? + expected_misses = 2 * num_properties - 1 + MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from? assert 0 < cache_info.misses <= expected_misses + MISSES_MAGIC_NUMBER # NOTE: that if the cache maxsize is hit, the number of misses will be more than expected # Theoretical number of total calls under the current implementation - expected_calls = num_properties*(num_properties - 1) // 2 - CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from? + expected_calls = num_properties * (num_properties - 1) // 2 + CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from? assert 0 < cache_info.hits + cache_info.misses <= expected_calls + CALLS_MAGIC_NUMBER - @pytest.mark.parametrize( - "num_properties", - [1, 2, 3, 4, 5, 10, 20, 50, 100] - ) + @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100]) def test_all_required_properties_doesnt_blow_up(self, num_properties): schema_obj = { "type": "object", - "properties": { - f"prop_{i}": {"type": "string"} for i in range(num_properties) - }, - "required": [f"prop_{i}" for i in range(num_properties)] + "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)}, + "required": [f"prop_{i}" for i in range(num_properties)], } from guidance.library._json import GenJson + genjson = GenJson(schema=schema_obj) genjson._join.__wrapped__.cache_clear() _ = genjson.root() @@ -2155,7 +2135,7 @@ class TestBooleanSchema: {"a": [1, 2, 3]}, {"a": {"b": 1}}, False, - True + True, ], ) def test_true_schema(self, target_obj): @@ -2168,13 +2148,14 @@ def test_true_schema(self, target_obj): [ False, {"type": "object", "properties": {"a": False}, "required": ["a"]}, - ] + ], ) def test_false_schema(self, schema_obj): with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) assert ve.value.args[0] == "No valid JSON can be generated from a schema of `False`" + class TestWhitespace: seps = [ (", ", ": "), @@ -2192,7 +2173,7 @@ class TestWhitespace: ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}), # Static object: const (both item and key seps) ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}), - ] + ], ) @pytest.mark.parametrize( "separators", @@ -2218,7 +2199,7 @@ def test_separators(self, separators, schema, obj): ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}), # Static object: const (both item and key seps) ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}), - ] + ], ) @pytest.mark.parametrize( "separators", diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py index 49f035283..f2248129d 100644 --- a/tests/unit/library/json/test_refs.py +++ b/tests/unit/library/json/test_refs.py @@ -1,9 +1,9 @@ -from .utils import check_match_failure, generate_and_check +from json import dumps as json_dumps import pytest from jsonschema import ValidationError, validate -from json import dumps as json_dumps +from .utils import check_match_failure, generate_and_check class TestRefs: @@ -439,9 +439,15 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct( # invalid on inner field ({"bar": "a", "foo": {"bar": 1}}, False), # invalid on outer field - ({ "bar": 1, "foo": {"bar": "a"}}, False), + ({"bar": 1, "foo": {"bar": "a"}}, False), # valid on both fields - ({"bar": "a", "foo": {"bar": "a"}, }, True), + ( + { + "bar": "a", + "foo": {"bar": "a"}, + }, + True, + ), ], ) def test_refs_with_relative_uris_and_defs(self, test_object, valid): @@ -975,4 +981,4 @@ def test_empty_tokens_in_ref_json_pointer(self, test_object, valid): else: with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) \ No newline at end of file + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) diff --git a/tests/unit/library/json/test_string_format.py b/tests/unit/library/json/test_string_format.py index 3259274bb..7b2dd9bdc 100644 --- a/tests/unit/library/json/test_string_format.py +++ b/tests/unit/library/json/test_string_format.py @@ -1,10 +1,10 @@ """Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/optional/format""" -import pytest import json -from .utils import generate_and_check -from .utils import check_match_failure +import pytest + +from .utils import check_match_failure, generate_and_check class TestDate: @@ -45,17 +45,35 @@ def test_good(self, target_str): "bad_str", [ '"2020-01-32"', # a invalid date string with 32 days in January - pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 29 days in February (normal) - pytest.param('"2020-02-30"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 30 days in February (leap) + pytest.param( + '"2021-02-29"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 29 days in February (normal) + pytest.param( + '"2020-02-30"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 30 days in February (leap) '"2020-03-32"', # a invalid date string with 32 days in March - pytest.param('"2020-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in April + pytest.param( + '"2020-04-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 31 days in April '"2020-05-32"', # a invalid date string with 32 days in May - pytest.param('"2020-06-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in June + pytest.param( + '"2020-06-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 31 days in June '"2020-07-32"', # a invalid date string with 32 days in July '"2020-08-32"', # a invalid date string with 32 days in August - pytest.param('"2020-09-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in September + pytest.param( + '"2020-09-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 31 days in September '"2020-10-32"', # a invalid date string with 32 days in October - pytest.param('"2020-11-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in November + pytest.param( + '"2020-11-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 31 days in November '"2020-12-32"', # a invalid date string with 32 days in December '"2020-13-01"', # a invalid date string with invalid month '"06/19/1963"', # an invalid date string @@ -63,8 +81,13 @@ def test_good(self, target_str): '"1998-1-20"', # non-padded month dates are not valid '"1998-01-1"', # non-padded day dates are not valid '"1998-13-01"', # invalid month - pytest.param('"1998-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # invalid month-day combination - pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")), # 2021 is not a leap year + pytest.param( + '"1998-04-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # invalid month-day combination + pytest.param( + '"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard") + ), # 2021 is not a leap year '"1963-06-1\\u09ea"', # invalid non-ASCII '৪' (a Bengali 4) '"20230328"', # ISO8601 / non-RFC3339: YYYYMMDD without dashes (2023-03-28) '"2023-W01"', # ISO8601 / non-RFC3339: week number implicit day of week (2023-01-02) @@ -138,6 +161,7 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) + @pytest.mark.xfail(reason="idn-hostname format not implemented") class TestIdnHostname: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"idn-hostname"}' @@ -301,6 +325,7 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) + @pytest.mark.xfail(reason="iri-reference format is not yet implemented") class TestIriReference: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"iri-reference"}' @@ -490,20 +515,40 @@ def test_good(self, target_str): '"008:030:006Z"', # invalid time string with extra leading zeros '"8:3:6Z"', # invalid time string with no leading zero for single digit '"8:0030:6Z"', # hour, minute, second must be two digits - pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, Zulu (wrong hour) - pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, Zulu (wrong minute) - pytest.param('"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, zero time-offset (wrong hour) - pytest.param('"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, zero time-offset (wrong minute) - pytest.param('"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, positive time-offset (wrong hour) - pytest.param('"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, positive time-offset (wrong minute) - pytest.param('"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, negative time-offset (wrong hour) - pytest.param('"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, negative time-offset (wrong minute) + pytest.param( + '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, Zulu (wrong hour) + pytest.param( + '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, Zulu (wrong minute) + pytest.param( + '"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, zero time-offset (wrong hour) + pytest.param( + '"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, zero time-offset (wrong minute) + pytest.param( + '"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, positive time-offset (wrong hour) + pytest.param( + '"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, positive time-offset (wrong minute) + pytest.param( + '"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, negative time-offset (wrong hour) + pytest.param( + '"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, negative time-offset (wrong minute) '"08:30:06-8:000"', # hour, minute in time-offset must be two digits '"24:00:00Z"', # an invalid time string with invalid hour '"00:60:00Z"', # an invalid time string with invalid minute '"00:00:61Z"', # an invalid time string with invalid second - pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid time string with invalid leap second (wrong hour) - pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid time string with invalid leap second (wrong minute) + pytest.param( + '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # an invalid time string with invalid leap second (wrong hour) + pytest.param( + '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # an invalid time string with invalid leap second (wrong minute) '"01:02:03+24:00"', # an invalid time string with invalid time numoffset hour '"01:02:03+00:60"', # an invalid time string with invalid time numoffset minute '"01:02:03Z+00:30"', # an invalid time string with invalid time with both Z and numoffset @@ -539,11 +584,23 @@ class TestIpv6: '"::42:ff:1"', # leading colons is valid '"d6::"', # trailing colons is valid '"1:d6::42"', # single set of double colons in the middle is valid - pytest.param('"1::d6:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with the ipv4 section as decimal octets - pytest.param('"1:2::192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with double colons between the sections - pytest.param('"::ffff:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with leading double colons (ipv4-mapped ipv6 address) + pytest.param( + '"1::d6:192.168.0.1"', + marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), + ), # mixed format with the ipv4 section as decimal octets + pytest.param( + '"1:2::192.168.0.1"', + marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), + ), # mixed format with double colons between the sections + pytest.param( + '"::ffff:192.168.0.1"', + marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), + ), # mixed format with leading double colons (ipv4-mapped ipv6 address) '"1:2:3:4:5:6:7:8"', # 8 octets - pytest.param('"1000:1000:1000:1000:1000:1000:255.255.255.255"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # a long valid ipv6 + pytest.param( + '"1000:1000:1000:1000:1000:1000:255.255.255.255"', + marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), + ), # a long valid ipv6 ], ) def test_good(self, target_str): @@ -711,11 +768,22 @@ class TestEmail: '"te~st@example.com"', # tilde in local part is valid '"~test@example.com"', # tilde before local part is valid '"test~@example.com"', # tilde after local part is valid - pytest.param('"\\"joe bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a space in the local part is valid - pytest.param('"\\"joe..bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a double dot in the local part is valid - pytest.param('"\\"joe@bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a @ in the local part is valid + pytest.param( + '"\\"joe bloggs\\"@example.com"', + marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), + ), # a quoted string with a space in the local part is valid + pytest.param( + '"\\"joe..bloggs\\"@example.com"', + marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), + ), # a quoted string with a double dot in the local part is valid + pytest.param( + '"\\"joe@bloggs\\"@example.com"', + marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), + ), # a quoted string with a @ in the local part is valid '"joe.bloggs@[127.0.0.1]"', # an IPv4-address-literal after the @ is valid - pytest.param('"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")), # an IPv6-address-literal after the @ is valid + pytest.param( + '"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard") + ), # an IPv6-address-literal after the @ is valid '"te.s.t@example.com"', # two separated dots inside local part are valid '"riedgar+guidance@example.com"', # plus sign in local part is valid ], @@ -861,9 +929,16 @@ def test_good(self, target_str): "bad_str", [ '"1998-12-31T23:59:61Z"', # an invalid date-time past leap second, UTC - pytest.param('"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid date-time with leap second on a wrong minute, UTC - pytest.param('"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid date-time with leap second on a wrong hour, UTC - pytest.param('"1990-02-31T15:59:59.123-08:00"', marks=pytest.mark.xfail(reason="valid days not yet tied to month")), # an invalid day in date-time string + pytest.param( + '"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # an invalid date-time with leap second on a wrong minute, UTC + pytest.param( + '"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # an invalid date-time with leap second on a wrong hour, UTC + pytest.param( + '"1990-02-31T15:59:59.123-08:00"', + marks=pytest.mark.xfail(reason="valid days not yet tied to month"), + ), # an invalid day in date-time string '"1990-12-31T15:59:59-24:00"', # an invalid offset in date-time string '"1963-06-19T08:30:06.28123+01:00Z"', # an invalid closing Z after time-zone offset '"06/19/1963 08:30:06 PST"', # an invalid date-time string @@ -878,6 +953,7 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) + @pytest.mark.xfail(reason="regex format not implemented") class TestRegex: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"regex"}' diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py index 920571d27..ffbbe3b5f 100644 --- a/tests/unit/library/json/utils.py +++ b/tests/unit/library/json/utils.py @@ -1,19 +1,23 @@ -from typing import Union, Optional, Any, Set -from guidance import json as gen_json -from guidance.library._json import JSONSchema - -from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check +import json +from functools import partial +from json import dumps as json_dumps +from json import loads as json_loads +from typing import Any, Optional, Set, Union from jsonschema import validate +from guidance import json as gen_json +from guidance.library._json import JSONSchema -import json -from functools import partial -from json import dumps as json_dumps, loads as json_loads +from ....utils import check_match_failure as _check_match_failure +from ....utils import check_run_with_temperature +from ....utils import generate_and_check as _generate_and_check def generate_and_check( - target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None + target_obj: Any, + schema_obj: Union[str, JSONSchema], + desired_temperature: Optional[float] = None, ): if isinstance(schema_obj, str): schema_obj = json_loads(schema_obj) @@ -26,9 +30,7 @@ def generate_and_check( # Now test that the grammar can recognize and generate prepared_json # We partial in the grammar_callable if desired_temperature is not None: - grammar_callable = partial( - gen_json, schema=schema_obj, temperature=desired_temperature - ) + grammar_callable = partial(gen_json, schema=schema_obj, temperature=desired_temperature) else: grammar_callable = partial(gen_json, schema=schema_obj) @@ -55,4 +57,4 @@ def check_match_failure( failure_byte=failure_byte, allowed_bytes=allowed_bytes, grammar=grammar, - ) \ No newline at end of file + ) From 8a7dfb6ed51e7c2c428ca7fae23c5e0db0ae9150 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 09:50:10 -0700 Subject: [PATCH 38/70] drop unnecessary typing import from test --- tests/unit/library/json/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py index ffbbe3b5f..5498d718c 100644 --- a/tests/unit/library/json/utils.py +++ b/tests/unit/library/json/utils.py @@ -2,7 +2,7 @@ from functools import partial from json import dumps as json_dumps from json import loads as json_loads -from typing import Any, Optional, Set, Union +from typing import Any, Optional, Union from jsonschema import validate @@ -46,7 +46,7 @@ def check_match_failure( bad_string: str, good_bytes: Optional[bytes] = None, failure_byte: Optional[bytes] = None, - allowed_bytes: Optional[Set[bytes]] = None, + allowed_bytes: Optional[set[bytes]] = None, schema_obj: Union[str, JSONSchema], ): grammar = gen_json(schema=schema_obj) From ce18dc469b351fde831fe68be8e5767d1a48bdb9 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 09:51:27 -0700 Subject: [PATCH 39/70] drop some more unnecessary imports --- guidance/library/_json.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index cb6ea3aff..a11a469a6 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -4,7 +4,6 @@ from typing import ( Any, Callable, - Dict, Mapping, Optional, Sequence, @@ -32,10 +31,10 @@ def urijoin(base: str, uri: str) -> str: raise from .._guidance import guidance -from ..library import char_range, gen, one_or_more, optional, sequence +from ..library import optional, sequence from ..library._regex_utils import rx_int_range, rx_float_range -from .._grammar import GrammarFunction, select, capture, with_temperature, Not, And, quote_regex +from .._grammar import GrammarFunction, select, with_temperature, Not, And, quote_regex from ._pydantic import pydantic_to_json_schema from ._subgrammar import as_regular_grammar, lexeme, subgrammar From 7af4401d9cf8ce7d3de08ee64690079ba50f47b1 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 12:43:20 -0700 Subject: [PATCH 40/70] raise UnsatisfiableSchemaError whenever allOf leads to conflicting constraints --- guidance/library/_json.py | 128 ++++++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 54 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index a11a469a6..5afdb1bd1 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -16,19 +16,7 @@ import referencing from collections import defaultdict import urllib.parse - -def urijoin(base: str, uri: str) -> str: - # Special case for fragment-only URIs - if uri.startswith("#"): - return f"{base}{uri}" - return urllib.parse.urljoin(base, uri) - -try: - import jsonschema - import pydantic -except ImportError: - if TYPE_CHECKING: - raise +import functools from .._guidance import guidance from ..library import optional, sequence @@ -38,12 +26,14 @@ def urijoin(base: str, uri: str) -> str: from ._pydantic import pydantic_to_json_schema from ._subgrammar import as_regular_grammar, lexeme, subgrammar -JSONSchema = Union[bool, dict[str, Any]] +try: + import jsonschema + import pydantic +except ImportError: + if TYPE_CHECKING: + raise -class Unset(Enum): - # https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions - token = 0 -_unset = Unset.token +JSONSchema = Union[bool, dict[str, Any]] DRAFT202012_RESERVED_KEYWORDS = { # Anchors and References @@ -388,6 +378,14 @@ class ObjectKeywords(str, Enum): "unknown": r"(?s:.*)", } + +def urijoin(base: str, uri: str) -> str: + # Special case for fragment-only URIs + if uri.startswith("#"): + return f"{base}{uri}" + return urllib.parse.urljoin(base, uri) + + def _get_format_pattern(format: str) -> str: try: pattern = FORMAT_PATTERNS[format] @@ -413,6 +411,9 @@ def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]: return set(node.keys()) & VALID_KEYS - set(IGNORED_KEYS) - {key} +class UnsatisfiableSchemaError(ValueError): + pass + class GenJson: item_separator = ", " key_separator = ": " @@ -728,22 +729,17 @@ def allOf( parent_schema: JSONSchema, base_uri: str, ): - type = set(JSONType) + types: list[set[JSONType]] = [] properties: defaultdict[str, list[JSONSchema]] = defaultdict(list) required: dict[str, None] = dict() # use a dict for ordered-set behavior additional_properties_list: list[tuple[JSONSchema, set[str]]] = [] prefix_items: defaultdict[int, list[JSONSchema]] = defaultdict(list) items_list: list[tuple[JSONSchema, set[int]]] = [] other_data: dict[str, Any] = {} - enum: Optional[list[Any]] = None - const: Union[Unset, Any] = _unset + enums: list[Sequence[Any]] = [] + consts: list[Any] = [] def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri: str): - nonlocal type - nonlocal required - nonlocal const - nonlocal enum - if key == Keyword.REF: ref = cast(str, value) abspath = urijoin(base_uri, ref) @@ -751,37 +747,19 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri add_schema(resolved.contents, base_uri=resolved.resolver._base_uri) elif key == Keyword.CONST: - if const is not _unset and const != value: - raise ValueError(f"allOf with multiple conflicting const values: {const!r} and {value!r}") - const = value + consts.append(value) elif key == Keyword.ENUM: value = cast(list[Any], value) - if enum is not None: - try: - enum = list(set(enum) & set(value)) - except TypeError: - # Check on equality, not on hash - # Yes, this is O(n^2). - # Hope the items were unique. - # ¯\_(ツ)_/¯ - enum = [a for a in enum for b in value if a == b] - else: - enum = value + enums.append(value) elif key == Keyword.TYPE: - value = cast(Union[str, list[str]], value) + value = cast(Union[str, Sequence[str]], value) if isinstance(value, str): value_set = {value} else: value_set = set(value) - if JSONType.NUMBER in value_set: - # Number implies integer - value_set.add(JSONType.INTEGER) - type &= value_set - # Throw an error early if we have conflicting types - if not type: - raise ValueError("allOf with conflicting types") + types.append(value_set) elif key == Keyword.ALLOF: value = cast(Sequence[JSONSchema], value) @@ -857,7 +835,7 @@ def add_schema(schema: JSONSchema, base_uri: str): if schema is True: return if schema is False: - raise ValueError("allOf contains a False schema") + raise UnsatisfiableSchemaError("allOf contains a 'false' schema") # Validate the schema's keys (we have only validated the parent schema's keys so far) # TODO: This will make us validate the parent twice... should probably be refactored @@ -876,9 +854,7 @@ def add_schema(schema: JSONSchema, base_uri: str): add_schema(parent_schema, base_uri) - combined_schema: dict[str, Any] = { - Keyword.TYPE: list(type), - } + combined_schema: dict[str, Any] = {} # Post-process additional_properties to make sure we apply the additional properties of one # schema to the properties of another schema @@ -898,13 +874,16 @@ def add_schema(schema: JSONSchema, base_uri: str): combined_schema[ObjectKeywords.PROPERTIES][name] = schemas[0] else: combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas} + if required: combined_schema[ObjectKeywords.REQUIRED] = list(required.keys()) + if additional_properties_list: if len(additional_properties_list) == 1: combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES], _ = additional_properties_list[0] else: combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": [schema for schema, _ in additional_properties_list]} + if prefix_items: combined_schema[ArrayKeywords.PREFIX_ITEMS] = [] for i in range(len(prefix_items)): @@ -913,16 +892,57 @@ def add_schema(schema: JSONSchema, base_uri: str): combined_schema[ArrayKeywords.PREFIX_ITEMS].append(schemas[0]) else: combined_schema[ArrayKeywords.PREFIX_ITEMS].append({"allOf": schemas}) + if items_list: if len(items_list) == 1: combined_schema[ArrayKeywords.ITEMS], _ = items_list[0] else: combined_schema[ArrayKeywords.ITEMS] = {"allOf": [schema for schema, _ in items_list]} - if enum is not None: + + if enums: + if len(enums) == 1: + enum = enums[0] + else: + def reduce_enums(enum_a, enum_b): + try: + enum = list(set(enum_a) & set(enum_b)) + except TypeError: + # Check on equality, not on hash + # Yes, this is O(n^2). + # Hope the items were unique. + # ¯\_(ツ)_/¯ + enum = [a for a in enum_a for b in enum_b if a == b] + return enum + enum = functools.reduce(reduce_enums, enums[1:], enums[0]) + if not enum: + raise UnsatisfiableSchemaError(f"allOf has enums with no common values: {enums}") combined_schema[Keyword.ENUM] = enum - if const is not _unset: + + if consts: + const, *rest = consts + for c in rest: + if c != const: + raise UnsatisfiableSchemaError(f"allOf has consts with different values: {consts}") combined_schema[Keyword.CONST] = const + if types: + if len(types) == 1: + type = list(types[0]) + else: + def reduce_types(type_a: set[JSONType], type_b: set[JSONType]) -> set[JSONType]: + common_types = type_a & type_b + # Integer is a "subtype" of number, so ensure we keep integer if we have "number" in one and "integer" in the other + if JSONType.INTEGER not in common_types and ( + (JSONType.NUMBER in type_a and JSONType.INTEGER in type_b) or + (JSONType.INTEGER in type_a and JSONType.NUMBER in type_b) + ): + common_types.add(JSONType.INTEGER) + return common_types + type = list(functools.reduce(reduce_types, types[1:], types[0])) + if not type: + raise UnsatisfiableSchemaError(f"allOf has conflicting types: {types}") + combined_schema[Keyword.TYPE] = type + assert not set(combined_schema) & set(other_data) combined_schema.update(other_data) From 487282af9fd395bbfd4822127289cf3eecafa1e6 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 12:47:48 -0700 Subject: [PATCH 41/70] raise UnsatisfiableSchemaError if schema is literal false --- guidance/library/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 5afdb1bd1..927dea5be 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -1058,7 +1058,7 @@ def json( if json_schema is True: json_schema = {} elif json_schema is False: - raise ValueError("No valid JSON can be generated from a schema of `False`") + raise UnsatisfiableSchemaError("No valid JSON can be generated from a schema of `false`") if json_schema == {}: return lm + self.any() From 6aa856722964a226fe7f8403f5df917acd13f885 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 13:26:41 -0700 Subject: [PATCH 42/70] catch UnsatisfiableSchemaError if raised when building grammar for object property; add tests that assert informative tracebacks --- guidance/library/_json.py | 44 ++++++++++++++++++--------- tests/unit/library/json/test_allOf.py | 4 +-- tests/unit/library/json/test_json.py | 23 +++++++------- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 927dea5be..031b544c3 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -551,17 +551,32 @@ def object( required: Sequence[str], base_uri: str, ): - # "required" keys will be validated against "properties" if they're present, otherwise against "additionalProperties". - # If "additionalProperties" is False, then required keys must be in "properties". - if any(k not in properties for k in required) and additional_properties is False: - raise ValueError( - f"Required properties not in properties but additionalProperties is False." - f" Missing required properties: {list(r for r in required if r not in properties)}" - ) + illegal_keys = set() + property_grammars: dict[str, GrammarFunction] = {} + for name, schema in properties.items(): + try: + property_grammars[name] = self.json(json_schema=schema, base_uri=base_uri) + except UnsatisfiableSchemaError as e: + # We get here if the schema is a literal False or is otherwise determined to be unsatisfiable + if name in required: + raise UnsatisfiableSchemaError(f"Required property {name!r} is unsatisfiable") from e + illegal_keys.add(name) + + additional_properties_grammar: Optional[GrammarFunction] = None + try: + additional_properties_grammar = self.json(json_schema=additional_properties, base_uri=base_uri) + except UnsatisfiableSchemaError as e: + if any(k not in properties for k in required): + # "required" keys will be validated against "properties" if they're present, otherwise against "additionalProperties". + # If "additionalProperties" is unsatisfiable, then required keys must be in "properties". + raise UnsatisfiableSchemaError( + f"Required properties not in properties but additionalProperties is unsatisfiable." + f" Missing required properties: {list(r for r in required if r not in properties)}" + ) from e keys: list[str] = [] required_items: list[bool] = [] - grammars: list[GrammarFunction] = [] + item_grammars: list[GrammarFunction] = [] # First iterate over the properties in order, then iterate over any missing required keys, using additional_properties as the schema for name in (*properties, *(r for r in required if r not in properties)): # Use json_dumps to properly quote / escape the key @@ -570,7 +585,7 @@ def object( # Identify if the key is required required_items.append(name in required) # Build the grammar we'll use for this property - grammars.append(f'{key}{self.key_separator}' + self.json(json_schema=properties.get(name, additional_properties), base_uri=base_uri)) + item_grammars.append(f'{key}{self.key_separator}' + property_grammars.get(name, additional_properties_grammar)) if additional_properties is not False: # Key for additionalProperties is a json string, but we need to disallow any properties that are already defined @@ -586,13 +601,14 @@ def object( else: additional_key_grammar = self.string() - additional_item_grammar = additional_key_grammar + self.key_separator + self.json(json_schema=additional_properties, base_uri=base_uri) - additional_items_grammar = sequence(additional_item_grammar + self.item_separator) + additional_item_grammar - grammars.append(additional_items_grammar) - required_items.append(False) + if additional_properties_grammar is not None: + additional_item_grammar = additional_key_grammar + self.key_separator + additional_properties_grammar + additional_items_grammar = sequence(additional_item_grammar + self.item_separator) + additional_item_grammar + item_grammars.append(additional_items_grammar) + required_items.append(False) return lm + "{" + self._join( - elements = tuple(grammars), + elements = tuple(item_grammars), required = tuple(required_items), ) + "}" diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py index 1a388d2ce..659171463 100644 --- a/tests/unit/library/json/test_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -176,7 +176,7 @@ def test_allOf_with_boolean_schemas_some_false(self, test_object, valid): validate(instance=test_object, schema=schema) with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) - assert ve.value.args[0] == "allOf contains a False schema" + assert ve.value.args[0] == "allOf contains a 'false' schema" @pytest.mark.parametrize( ["test_object", "valid"], @@ -198,7 +198,7 @@ def test_allOf_with_boolean_schemas_all_false(self, test_object, valid): validate(instance=test_object, schema=schema) with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) - assert ve.value.args[0] == "allOf contains a False schema" + assert ve.value.args[0] == "allOf contains a 'false' schema" @pytest.mark.parametrize( ["test_object", "valid"], diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index f3602fddd..724369a3e 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -707,8 +707,9 @@ def test_false_additionalProperties_fails(self): _ = gen_json(schema=schema) assert ( ve.value.args[0] - == "Required properties not in properties but additionalProperties is False. Missing required properties: ['b', 'c']" + == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b', 'c']" ) + assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" class TestSimpleArray: @@ -1227,7 +1228,7 @@ def test_allOf_bad_schema(self): schema = {"allOf": [{"type": "integer"}, {"type": "string"}]} with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) - assert ve.value.args[0] == "allOf with conflicting types" + assert ve.value.args[0] == "allOf has conflicting types: [{'integer'}, {'string'}]" class TestOneOf: @@ -2143,18 +2144,18 @@ def test_true_schema(self, target_obj): schema_obj = True generate_and_check(target_obj, schema_obj) - @pytest.mark.parametrize( - "schema_obj", - [ - False, - {"type": "object", "properties": {"a": False}, "required": ["a"]}, - ], - ) - def test_false_schema(self, schema_obj): + def test_false_schema(self): + schema_obj = False with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) - assert ve.value.args[0] == "No valid JSON can be generated from a schema of `False`" + assert ve.value.args[0] == "No valid JSON can be generated from a schema of `false`" + def test_false_required_property(self): + schema_obj = {"type": "object", "properties": {"a": False}, "required": ["a"]} + with pytest.raises(ValueError) as ve: + gen_json(schema=schema_obj) + assert ve.value.args[0] == "Required property 'a' is unsatisfiable" + assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" class TestWhitespace: seps = [ From 372bd3df3a50da14b096dc2d83560eec800c4642 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 13:51:18 -0700 Subject: [PATCH 43/70] raise more UnsatisfiableSchemaErrors if min > max for string, number, array --- guidance/library/_json.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 031b544c3..593b6aa56 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -411,6 +411,16 @@ def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]: return set(node.keys()) & VALID_KEYS - set(IGNORED_KEYS) - {key} +def check_number_bounds(minimum: Union[float, int, None], maximum: Union[float, int, None], exclusiveMinimum: bool, exclusiveMaximum: bool): + if minimum is not None and maximum is not None: + if minimum > maximum: + raise UnsatisfiableSchemaError(f"Number minimum ({minimum}) is greater than maximum ({maximum})") + if minimum == maximum and (exclusiveMinimum or exclusiveMaximum): + minimum_repr = f"exclusiveMinimum {minimum}" if exclusiveMinimum else f"minimum {minimum}" + maximum_repr = f"exclusiveMaximum {maximum}" if exclusiveMaximum else f"maximum {maximum}" + raise UnsatisfiableSchemaError(f"Number {minimum_repr} is equal to {maximum_repr}") + + class UnsatisfiableSchemaError(ValueError): pass @@ -471,6 +481,8 @@ def root(self, lm): @classmethod @guidance(stateless=True) def integer(cls, lm, minimum: Union[float, int, None] = None, maximum: Union[float, int, None] = None, exclusiveMinimum: bool = False, exclusiveMaximum: bool = False): + check_number_bounds(minimum, maximum, exclusiveMinimum, exclusiveMaximum) + if minimum is not None: if exclusiveMinimum: if minimum != int(minimum): @@ -496,6 +508,8 @@ def integer(cls, lm, minimum: Union[float, int, None] = None, maximum: Union[flo @classmethod @guidance(stateless=True) def number(cls, lm, minimum: Optional[float] = None, maximum: Optional[float] = None, exclusiveMinimum: bool = False, exclusiveMaximum: bool = False): + check_number_bounds(minimum, maximum, exclusiveMinimum, exclusiveMaximum) + return lm + lexeme( rx_float_range( minimum, maximum, @@ -517,6 +531,9 @@ def string( regex: Union[str, None] = None, format: Union[str, None] = None, ): + if min_length is not None and max_length is not None and min_length > max_length: + raise UnsatisfiableSchemaError(f"String minLength ({min_length}) is greater than maxLength ({max_length})") + if (regex is not None or format is not None) and (min_length > 0 or max_length is not None): raise ValueError( "If a pattern or format is specified for a JSON string, minLength and maxLength must be left unspecified." @@ -651,6 +668,9 @@ def array( max_items: Optional[int], base_uri: str, ): + if max_items is not None and min_items > max_items: + raise UnsatisfiableSchemaError(f"minItems ({min_items}) is greater than maxItems ({max_items})") + if len(prefix_items_schema) < min_items and item_schema is False: raise ValueError( f"PrefixItems has too few elements ({len(prefix_items_schema)}) to" From 7149e87fcb0380a34253dd413f36ef2317eebda0 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 13:54:35 -0700 Subject: [PATCH 44/70] add illegal keys to not expression of additonal_key_grammar --- guidance/library/_json.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 593b6aa56..70c144a14 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -577,7 +577,9 @@ def object( # We get here if the schema is a literal False or is otherwise determined to be unsatisfiable if name in required: raise UnsatisfiableSchemaError(f"Required property {name!r} is unsatisfiable") from e - illegal_keys.add(name) + # Use json_dumps to properly quote / escape the key + key = json_dumps(name) + illegal_keys.add(key) additional_properties_grammar: Optional[GrammarFunction] = None try: @@ -611,7 +613,7 @@ def object( additional_key_grammar = as_regular_grammar( And([ lexeme(r'"([^"\\]|\\["\\/bfnrt]|\\u[0-9a-fA-F]{4})*"'), - Not(lexeme('|'.join(map(quote_regex, keys)))), + Not(lexeme('|'.join(map(quote_regex, (*keys, *illegal_keys))))), ]), lexeme = True, ) @@ -677,9 +679,6 @@ def array( f" satisfy minItems ({min_items}) but no extra items were allowed" ) - if max_items is not None and max_items < min_items: - raise ValueError(f"maxItems ({max_items}) can't be less than minItems ({min_items})") - required_items = [] optional_items = [] From 614d29f55f789bc37c9e4878069883b20f3c1f78 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 14:10:09 -0700 Subject: [PATCH 45/70] mypy --- guidance/library/_json.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 70c144a14..a515f9276 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -604,7 +604,7 @@ def object( # Identify if the key is required required_items.append(name in required) # Build the grammar we'll use for this property - item_grammars.append(f'{key}{self.key_separator}' + property_grammars.get(name, additional_properties_grammar)) + item_grammars.append(f'{key}{self.key_separator}' + property_grammars.get(name, cast(GrammarFunction, additional_properties_grammar))) if additional_properties is not False: # Key for additionalProperties is a json string, but we need to disallow any properties that are already defined @@ -764,7 +764,7 @@ def allOf( parent_schema: JSONSchema, base_uri: str, ): - types: list[set[JSONType]] = [] + types: list[set[str]] = [] properties: defaultdict[str, list[JSONSchema]] = defaultdict(list) required: dict[str, None] = dict() # use a dict for ordered-set behavior additional_properties_list: list[tuple[JSONSchema, set[str]]] = [] @@ -948,7 +948,7 @@ def reduce_enums(enum_a, enum_b): # ¯\_(ツ)_/¯ enum = [a for a in enum_a for b in enum_b if a == b] return enum - enum = functools.reduce(reduce_enums, enums[1:], enums[0]) + enum = functools.reduce(reduce_enums, enums) if not enum: raise UnsatisfiableSchemaError(f"allOf has enums with no common values: {enums}") combined_schema[Keyword.ENUM] = enum @@ -964,7 +964,7 @@ def reduce_enums(enum_a, enum_b): if len(types) == 1: type = list(types[0]) else: - def reduce_types(type_a: set[JSONType], type_b: set[JSONType]) -> set[JSONType]: + def reduce_types(type_a: set[str], type_b: set[str]) -> set[str]: common_types = type_a & type_b # Integer is a "subtype" of number, so ensure we keep integer if we have "number" in one and "integer" in the other if JSONType.INTEGER not in common_types and ( @@ -973,7 +973,7 @@ def reduce_types(type_a: set[JSONType], type_b: set[JSONType]) -> set[JSONType]: ): common_types.add(JSONType.INTEGER) return common_types - type = list(functools.reduce(reduce_types, types[1:], types[0])) + type = list(functools.reduce(reduce_types, types)) # type: ignore[arg-type] if not type: raise UnsatisfiableSchemaError(f"allOf has conflicting types: {types}") combined_schema[Keyword.TYPE] = type From 3b802c8d4f59160962d6ecc43d8a52ab00b78ca5 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 15:08:36 -0700 Subject: [PATCH 46/70] unsatisfiable items --- guidance/library/_json.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index a515f9276..78d813fbd 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -673,11 +673,15 @@ def array( if max_items is not None and min_items > max_items: raise UnsatisfiableSchemaError(f"minItems ({min_items}) is greater than maxItems ({max_items})") - if len(prefix_items_schema) < min_items and item_schema is False: - raise ValueError( - f"PrefixItems has too few elements ({len(prefix_items_schema)}) to" - f" satisfy minItems ({min_items}) but no extra items were allowed" - ) + items_grammar: Optional[GrammarFunction] = None + try: + items_grammar = self.json(json_schema=item_schema, base_uri=base_uri) + except UnsatisfiableSchemaError as e: + if len(prefix_items_schema) < min_items: + raise UnsatisfiableSchemaError( + f"prefixItems has too few elements ({len(prefix_items_schema)}) to satisfy minItems ({min_items})" + f" but item schema is unsatisfiable" + ) from e required_items = [] optional_items = [] @@ -686,24 +690,29 @@ def array( n_to_add = max(len(prefix_items_schema), min_items) if max_items is None else max_items for i in range(n_to_add): if i < len(prefix_items_schema): - schema = prefix_items_schema[i] - elif item_schema is not False: - schema = item_schema + try: + item = self.json(json_schema=prefix_items_schema[i], base_uri=base_uri) + except UnsatisfiableSchemaError as e: + # i corresponds to the number of items we've already satisfied + if i < min_items: + raise UnsatisfiableSchemaError(f"prefixItems[{i}] is unsatisfiable but min_items is {min_items}") from e + # Having an unsatisfiable prefix item is fine if we've already satisfied min_items, but this effectively sets max_items to i + max_items = i + break + elif items_grammar is not None: + item = items_grammar else: assert i >= min_items break - item = self.json(json_schema=schema, base_uri=base_uri) - if i < min_items: required_items.append(item) else: optional_items.append(item) - if max_items is None and item_schema is not False: + if max_items is None and items_grammar is not None: # Add an infinite tail of items - item = self.json(json_schema=item_schema, base_uri=base_uri) - optional_items.append(item + sequence(self.item_separator + item)) + optional_items.append(items_grammar + sequence(self.item_separator + items_grammar)) lm += "[" From 57c7d421be26376fc005b239ea3b01d9e42a7ba9 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 15:39:53 -0700 Subject: [PATCH 47/70] drop xfail --- tests/unit/library/json/test_allOf.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py index 659171463..4cc039d9d 100644 --- a/tests/unit/library/json/test_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -419,21 +419,14 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): {"additionalProperties": {"type": "string"}}, ], } - try: - if valid: + if valid: + validate(instance=test_object, schema=schema) + generate_and_check(test_object, schema) + else: + with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) - generate_and_check(test_object, schema) - else: - with pytest.raises(ValidationError): - validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - except ValueError as ve: - if ve.args[0] == "allOf with conflicting types": - pytest.xfail( - reason="We should be returning a False schema from allOf if there is a conflict, but we currently raise an error" - ) - else: - raise + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + @pytest.mark.parametrize( "test_object, valid", From faa3fe7de23553e1209b9d3ced3c7b8282065a09 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 16:16:55 -0700 Subject: [PATCH 48/70] exception string --- guidance/library/_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 78d813fbd..bc65e8a7c 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -416,8 +416,8 @@ def check_number_bounds(minimum: Union[float, int, None], maximum: Union[float, if minimum > maximum: raise UnsatisfiableSchemaError(f"Number minimum ({minimum}) is greater than maximum ({maximum})") if minimum == maximum and (exclusiveMinimum or exclusiveMaximum): - minimum_repr = f"exclusiveMinimum {minimum}" if exclusiveMinimum else f"minimum {minimum}" - maximum_repr = f"exclusiveMaximum {maximum}" if exclusiveMaximum else f"maximum {maximum}" + minimum_repr = f"exclusiveMinimum ({minimum})" if exclusiveMinimum else f"minimum ({minimum})" + maximum_repr = f"exclusiveMaximum ({maximum})" if exclusiveMaximum else f"maximum ({maximum})" raise UnsatisfiableSchemaError(f"Number {minimum_repr} is equal to {maximum_repr}") From 01039e2b2b4611cf4b43b0adf6b60c240a7d01ae Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 16:31:49 -0700 Subject: [PATCH 49/70] UnsatisfiableSchemaError for empty oneOf, anyOf --- guidance/library/_json.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index bc65e8a7c..55f63056b 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -749,6 +749,8 @@ def anyOf( anyof_list: Sequence[JSONSchema], base_uri: str, ): + if not anyof_list: + raise UnsatisfiableSchemaError("anyOf has no schemas") options = [self.json(json_schema=item, base_uri=base_uri) for item in anyof_list] return lm + select(options) @@ -760,6 +762,8 @@ def oneOf( oneof_list: Sequence[JSONSchema], base_uri: str, ): + if not oneof_list: + raise UnsatisfiableSchemaError("oneOf has no schemas") if len(oneof_list) == 1: return lm + self.json(json_schema=oneof_list[0], base_uri=base_uri) warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.") From a48c9d24ef64fcc2ae9e26bd923d6a5a0fe62a60 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 16:38:21 -0700 Subject: [PATCH 50/70] test unsatisfiable integer --- guidance/library/_json.py | 4 ++-- tests/unit/library/json/test_json.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 55f63056b..3f70fde9c 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -414,11 +414,11 @@ def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]: def check_number_bounds(minimum: Union[float, int, None], maximum: Union[float, int, None], exclusiveMinimum: bool, exclusiveMaximum: bool): if minimum is not None and maximum is not None: if minimum > maximum: - raise UnsatisfiableSchemaError(f"Number minimum ({minimum}) is greater than maximum ({maximum})") + raise UnsatisfiableSchemaError(f"minimum ({minimum}) is greater than maximum ({maximum})") if minimum == maximum and (exclusiveMinimum or exclusiveMaximum): minimum_repr = f"exclusiveMinimum ({minimum})" if exclusiveMinimum else f"minimum ({minimum})" maximum_repr = f"exclusiveMaximum ({maximum})" if exclusiveMaximum else f"maximum ({maximum})" - raise UnsatisfiableSchemaError(f"Number {minimum_repr} is equal to {maximum_repr}") + raise UnsatisfiableSchemaError(f"{minimum_repr} is equal to {maximum_repr}") class UnsatisfiableSchemaError(ValueError): diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 724369a3e..1984b9478 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -2,6 +2,7 @@ from json import dumps as json_dumps import pytest +import re from jsonschema import ValidationError, validate from guidance import json as gen_json @@ -76,6 +77,21 @@ def test_bad_integer(self, bad_string, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) + @pytest.mark.parametrize( + "schema", + [ + {"type": "integer", "minimum": 5, "maximum": 4}, + {"type": "integer", "minimum": 5, "exclusiveMaximum": 5}, + {"type": "integer", "exclusiveMinimum": 5, "maximum": 5}, + ] + ) + def test_unsatisfiable_min_max(self, schema): + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert re.fullmatch( + r"(exclusiveMinimum|minimum) \(5\) is (greater than|equal to) (exclusiveMaximum|maximum) \((4|5)\)", + ve.value.args[0] + ) class TestNumber: schema = """{"type": "number" }""" From 820b8ba1ca398e6600f36ed616b5403e63bded4a Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 16:39:33 -0700 Subject: [PATCH 51/70] test unsatisfiable number --- tests/unit/library/json/test_json.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 1984b9478..13291b080 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -146,6 +146,21 @@ def test_bad_number(self, bad_string, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) + @pytest.mark.parametrize( + "schema", + [ + {"type": "integer", "minimum": 5, "maximum": 4}, + {"type": "integer", "minimum": 5, "exclusiveMaximum": 5}, + {"type": "integer", "exclusiveMinimum": 5, "maximum": 5}, + ] + ) + def test_unsatisfiable_min_max(self, schema): + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert re.fullmatch( + r"(exclusiveMinimum|minimum) \(5\) is (greater than|equal to) (exclusiveMaximum|maximum) \((4|5)\)", + ve.value.args[0] + ) class TestBoundedNumeric: @pytest.mark.parametrize( From 2769f25d8e00e9f4b600307184b6e3429097a8fe Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 16:39:52 -0700 Subject: [PATCH 52/70] test unsatisfiable string --- tests/unit/library/json/test_json.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 13291b080..3118ffe3d 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -571,6 +571,12 @@ def test_maxLength_bad(self, bad_string: str, good_bytes, failure_byte, allowed_ schema_obj=schema_obj, ) + def test_unsatisfiable_length(self): + schema = {"type": "string", "minLength": 10, "maxLength": 5} + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == "String minLength (10) is greater than maxLength (5)" + class TestSimpleObject: # These are objects without cross references From 95e8134baf7f2f8580675358df47bc0fe9941e45 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 16:41:53 -0700 Subject: [PATCH 53/70] test unsatisfiable array --- tests/unit/library/json/test_json.py | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 3118ffe3d..1b7a011b2 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -836,6 +836,57 @@ def test_bad_object(self, bad_string, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) + def test_unsatisfiable_prefixItem_ok(self): + schema = { + "type": "array", + "prefixItems": [{"type": "integer"}, False] + } + generate_and_check([42], schema) + check_match_failure( + bad_string="[42, 43]", + good_bytes=b"[42", + failure_byte=b",", + allowed_bytes={b"]"} | INTEGER_FOLLOWING, + schema_obj=schema + ) + + def test_unsatisfiable_prefixItem_raises(self): + schema = { + "type": "array", + "prefixItems": [{"type": "integer"}, False], + "minItems": 2, + } + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == "prefixItems[1] is unsatisfiable but min_items is 2" + assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + + def test_unsatisfiable_items_ok(self): + schema = { + "type": "array", + "prefixItems": [{"type": "integer"}], + "items": {"allOf": [{"type": "integer"}, False]} + } + generate_and_check([42], schema) + check_match_failure( + bad_string="[42, 43]", + good_bytes=b"[42", + failure_byte=b",", + allowed_bytes={b"]"} | INTEGER_FOLLOWING, + schema_obj=schema + ) + + def test_unsatisfiable_items_raises(self): + schema = { + "type": "array", + "prefixItems": [{"type": "integer"}], + "items": {"allOf": [{"type": "integer"}, False]}, + "minItems": 2, + } + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == "prefixItems has too few elements (1) to satisfy minItems (2) but item schema is unsatisfiable" + assert ve.value.__cause__.args[0] == "allOf contains a 'false' schema" class TestArrayWithLengthConstraints: prefix_schema_obj = [{"type": "integer"}, {"type": "boolean"}] @@ -1120,6 +1171,12 @@ def test_bad_with_items( schema_obj=schema_obj, ) + def test_unsatisfiable_length(self): + schema = {"type": "array", "minItems": 10, "maxItems": 5} + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == "minItems (10) is greater than maxItems (5)" + class TestAnyOf: @pytest.mark.parametrize("target_obj", [123, True]) From e21924df4d0ba9580a655c7740612f847a0595f5 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 16:49:05 -0700 Subject: [PATCH 54/70] test unsatisfiable object --- guidance/library/_json.py | 2 +- tests/unit/library/json/test_json.py | 38 ++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 3f70fde9c..3ad093c6f 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -597,7 +597,7 @@ def object( required_items: list[bool] = [] item_grammars: list[GrammarFunction] = [] # First iterate over the properties in order, then iterate over any missing required keys, using additional_properties as the schema - for name in (*properties, *(r for r in required if r not in properties)): + for name in (*property_grammars.keys(), *(r for r in required if r not in properties)): # Use json_dumps to properly quote / escape the key key = json_dumps(name) keys.append(key) diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 1b7a011b2..d5d25c79b 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -708,6 +708,44 @@ def test_bad_object(self, bad_string, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) + def test_unsatisfiable_properties_ok(self): + schema = { + "type": "object", + "properties": {"a": {"type": "integer"}, "b": False}, + "additionalProperties": False, + } + generate_and_check({"a": 42}, schema) + check_match_failure( + bad_string=json_dumps({"a": 42, "b": 43}), + good_bytes=b'{"a": 42', + failure_byte=b",", + allowed_bytes={b"}"} | INTEGER_FOLLOWING, + schema_obj=schema, + ) + + def test_unsatisfiable_properties_raises(self): + schema = { + "type": "object", + "properties": {"a": {"type": "integer"}, "b": False}, + "required": ["b"], + "additionalProperties": False, + } + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == "Required property 'b' is unsatisfiable" + assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + + def test_unsatisfiable_additional_properties_raises(self): + schema = { + "type": "object", + "properties": {"a": {"type": "integer"}}, + "required": ["a", "b"], + "additionalProperties": False, + } + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b']" + assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" class TestObjectWithMissingRequired: def test_required_is_required(self): From add4f518d874cd103718cb7a714ef67332125218 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 17:06:35 -0700 Subject: [PATCH 55/70] raise UnsatisfiableSchemaError if all anyOf subschemas are unsatisfiable --- guidance/library/_json.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 3ad093c6f..d4be005aa 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -751,7 +751,16 @@ def anyOf( ): if not anyof_list: raise UnsatisfiableSchemaError("anyOf has no schemas") - options = [self.json(json_schema=item, base_uri=base_uri) for item in anyof_list] + + options: list[GrammarFunction] = [] + for item in anyof_list: + try: + options.append(self.json(json_schema=item, base_uri=base_uri)) + except UnsatisfiableSchemaError: + pass + if not options: + # Can't really point to any one schema that's unsatisfiable, so let's include all the schemas in the error message + raise UnsatisfiableSchemaError("all anyOf schemas are unsatisfiable: " + json_dumps(anyof_list)) return lm + select(options) @guidance(stateless=True) From 6d35e6b12d8d9456b59410ac8074fdda96fe7b42 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 17:07:26 -0700 Subject: [PATCH 56/70] punt multi-type schemas to anyOf so it can handle unsatisfiable subschemas --- guidance/library/_json.py | 42 +++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index d4be005aa..ac1d24470 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -1223,20 +1223,12 @@ def json( raise NotImplementedError(f"enum with sibling keys is not yet supported. Got {sibling_keys}") return lm + self.enum(options=json_schema[Keyword.ENUM], instance_type=json_schema.get(Keyword.TYPE, None)) - if Keyword.TYPE in json_schema: - target_types = cast(Union[str, Sequence[str]], json_schema[Keyword.TYPE]) - if isinstance(target_types, str): - target_types = [target_types] - else: - target_types = list(JSONType) - - options: list[Union[str, GrammarFunction]] = [] - option: Union[str, GrammarFunction] - for target_type in target_types: + if Keyword.TYPE in json_schema and isinstance(json_schema[Keyword.TYPE], str): + target_type = json_schema[Keyword.TYPE] if target_type == JSONType.NULL: - option = "null" + return lm + "null" elif target_type == JSONType.BOOLEAN: - option = select(["true", "false"]) + return lm + select(["true", "false"]) elif target_type in {JSONType.INTEGER, JSONType.NUMBER}: minimum = cast(Union[int, float, None], json_schema.get(NumberKeywords.MINIMUM, None)) maximum = cast(Union[int, float, None], json_schema.get(NumberKeywords.MAXIMUM, None)) @@ -1258,28 +1250,28 @@ def json( exclusive_maximum_flag = True if target_type == JSONType.INTEGER: - option = self.integer( + return lm + self.integer( minimum=minimum, maximum=maximum, exclusiveMinimum=exclusive_minimum_flag, exclusiveMaximum=exclusive_maximum_flag, ) else: - option = self.number( + return lm + self.number( minimum=minimum, maximum=maximum, exclusiveMinimum=exclusive_minimum_flag, exclusiveMaximum=exclusive_maximum_flag, ) elif target_type == JSONType.STRING: - option = self.string( + return lm + self.string( regex=json_schema.get(StringKeywords.PATTERN, None), format=json_schema.get(StringKeywords.FORMAT, None), min_length=json_schema.get(StringKeywords.MIN_LENGTH, 0), max_length=json_schema.get(StringKeywords.MAX_LENGTH, None), ) elif target_type == JSONType.ARRAY: - option = self.array( + return lm + self.array( prefix_items_schema=json_schema.get(ArrayKeywords.PREFIX_ITEMS, []), item_schema=json_schema.get(ArrayKeywords.ITEMS, True), min_items=json_schema.get(ArrayKeywords.MIN_ITEMS, 0), @@ -1287,7 +1279,7 @@ def json( base_uri=base_uri, ) elif target_type == JSONType.OBJECT: - option = self.object( + return lm + self.object( properties=json_schema.get(ObjectKeywords.PROPERTIES, {}), additional_properties=json_schema.get(ObjectKeywords.ADDITIONAL_PROPERTIES, True), required=json_schema.get(ObjectKeywords.REQUIRED, set()), @@ -1295,9 +1287,21 @@ def json( ) else: raise ValueError(f"Unsupported type in schema: {target_type}") - options.append(option) - return lm + select(options) + if Keyword.TYPE in json_schema: + json_schema = json_schema.copy() + target_types = cast(Sequence[JSONType], json_schema.pop(Keyword.TYPE)) + else: + target_types = list(JSONType) + + assert Keyword.TYPE not in json_schema + # Punt to anyOf if we have multiple types so that it can ignore an unsatisfiable subset + return lm + self.anyOf( + anyof_list = [ + {"type": target_type, **json_schema} for target_type in target_types + ], + base_uri=base_uri, + ) @guidance(stateless=True) From b236a2c6acffce8a5ab64669ea3175b274772042 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 1 Nov 2024 17:07:44 -0700 Subject: [PATCH 57/70] test unsatisfiable anyOf --- tests/unit/library/json/test_json.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index d5d25c79b..3a3b553d9 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -1299,6 +1299,19 @@ def test_anyOf_objects(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + def test_anyOf_unsatisfiable_ok(self): + schema = { + "anyOf": [{"type": "integer"}, False] + } + generate_and_check(3, schema) + + def test_anyOf_unsatisfiable_raises(self): + schema = { + "anyOf": [{"type": "integer", "minimum": 10, "maximum": 0}, False], + } + with pytest.raises(ValueError) as ve: + _ = gen_json(schema=schema) + assert ve.value.args[0] == 'all anyOf schemas are unsatisfiable: [{"type": "integer", "minimum": 10, "maximum": 0}, false]' class TestAllOf: @pytest.mark.parametrize( From ce84d5369e37d987deb74875cdfefecb7d31208a Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 4 Nov 2024 09:29:04 -0800 Subject: [PATCH 58/70] Revert "blacken json tests" This reverts commit 496718ed7e8a0cc62744401d174831e03ac352f1. Undo blacken to reduce diff size of PR --- tests/unit/library/json/test_allOf.py | 58 ++-- tests/unit/library/json/test_json.py | 248 ++++++++++-------- tests/unit/library/json/test_refs.py | 17 +- tests/unit/library/json/test_string_format.py | 137 +++------- tests/unit/library/json/utils.py | 24 +- 5 files changed, 207 insertions(+), 277 deletions(-) diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py index 4cc039d9d..670878c71 100644 --- a/tests/unit/library/json/test_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -6,7 +6,6 @@ from jsonschema import ValidationError, validate from guidance import json as gen_json - from .utils import check_match_failure, generate_and_check @@ -139,6 +138,7 @@ def test_allOf_simple_maximum(self, test_object, valid): validate(instance=test_object, schema=schema) check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + @pytest.mark.parametrize( ["test_object", "valid"], [ @@ -357,18 +357,15 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): ({"foo": 0, "bar": 5, "baz": 4}, False), # invalid: baz is not an integer or null ({"foo": 0, "bar": 5, "baz": "quxx"}, False), - ], + ] ) @pytest.mark.parametrize( "schema", [ # The following are equivalent to this: { - "properties": { - "foo": {"type": ["integer", "null"], "maximum": 4}, - "bar": {"minimum": 5, "maximum": 5}, - }, - "additionalProperties": {"type": ["integer", "null"], "minimum": 5}, + "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}}, + "additionalProperties": {"type": ["integer", "null"], "minimum": 5} }, # additionalProperties in parent schema { @@ -376,22 +373,16 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}} ], "properties": {"bar": {"maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"]}, + "additionalProperties": {"type": ["integer", "null"]} }, # additionalProperties in allOf { "allOf": [ - { - "properties": {"foo": {"maximum": 4}}, - "additionalProperties": {"minimum": 5}, - }, - { - "properties": {"bar": {"maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"]}, - }, + {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}, + {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}} ] }, - ], + ] ) def test_additionalProperties_in_allOf(self, schema, test_object, valid): if valid: @@ -405,19 +396,19 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid): @pytest.mark.parametrize( "test_object, valid", [ - ({}, True), # empty object is valid - ({"foo": 1}, False), # foo is not a string - ({"foo": "x"}, False), # foo is not an integer - ({"foo": True}, False), # foo is not a string or an integer - ], + ({}, True), # empty object is valid + ({"foo": 1}, False), # foo is not a string + ({"foo": "x"}, False), # foo is not an integer + ({"foo": True}, False), # foo is not a string or an integer + ] ) def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): schema = { "type": "object", "allOf": [ {"additionalProperties": {"type": "integer"}}, - {"additionalProperties": {"type": "string"}}, - ], + {"additionalProperties": {"type": "string"}} + ] } if valid: validate(instance=test_object, schema=schema) @@ -449,18 +440,15 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): ([0, 5, 4], False), # invalid: baz is not an integer or null ([0, 5, "quxx"], False), - ], + ] ) @pytest.mark.parametrize( "schema", [ # The following are equivalent to this: { - "prefixItems": [ - {"type": ["integer", "null"], "maximum": 4}, - {"minimum": 5, "maximum": 5}, - ], - "items": {"type": ["integer", "null"], "minimum": 5}, + "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}], + "items": {"type": ["integer", "null"], "minimum": 5} }, # items in parent schema { @@ -468,19 +456,17 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, ], "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], - "items": {"type": ["integer", "null"]}, + "items": {"type": ["integer", "null"]} + }, # items in allOf { "allOf": [ {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, - { - "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], - "items": {"type": ["integer", "null"]}, - }, + {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}} ] }, - ], + ] ) def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid): if valid: diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 3a3b553d9..b7a663a91 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -1,22 +1,22 @@ import json -from json import dumps as json_dumps import pytest import re from jsonschema import ValidationError, validate +from json import dumps as json_dumps from guidance import json as gen_json from guidance import models -from guidance.library._json import IGNORED_KEYS +from guidance.library._json import IGNORED_KEYS from .utils import check_match_failure, generate_and_check + # Common sets of allowed_bytes INTEGER_LEADING = {b"-", b"0", *{bytes([i]) for i in range(ord("1"), ord("9") + 1)}} INTEGER_FOLLOWING = {bytes([i]) for i in range(ord("0"), ord("9") + 1)} A_to_Z = {bytes([i]) for i in range(ord("A"), ord("Z") + 1)} - def test_null(): schema = """{"type": "null" }""" @@ -171,15 +171,11 @@ class TestBoundedNumeric: (-5, {"type": "integer", "minimum": -5}, True), pytest.param( *(5.0, {"type": "integer", "minimum": 5}, True), - marks=pytest.mark.xfail( - reason="JSON technically allows trailing zeroes, but we currently don't" - ), + marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't") ), pytest.param( *(-5.0, {"type": "integer", "minimum": -5}, True), - marks=pytest.mark.xfail( - reason="JSON technically allows trailing zeroes, but we currently don't" - ), + marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't") ), (5.1, {"type": "integer", "minimum": 5}, False), (-5.1, {"type": "integer", "minimum": -5}, False), @@ -239,11 +235,7 @@ class TestBoundedNumeric: (5.1, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True), (-9.9, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True), (5.0, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, False), - ( - -10.0, - {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, - False, - ), + (-10.0, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, False), (9.9, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True), (-5.1, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True), # --- Edge cases --- @@ -284,10 +276,10 @@ class TestBoundedNumeric: (0.2999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, True), (-0.2999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, True), (0.0999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False), - (-0.0999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False), + (-0.0999, {"type": "number", "minimum": -.3, "maximum": -0.1}, False), (0.3001, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False), (-0.3001, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False), - ], + ] ) def test_numeric_validation(self, instance, schema, should_pass): # Sanity check @@ -297,7 +289,10 @@ def test_numeric_validation(self, instance, schema, should_pass): else: with pytest.raises(ValidationError): validate(instance, schema=schema) - check_match_failure(bad_string=json_dumps(instance), schema_obj=schema) + check_match_failure( + bad_string=json_dumps(instance), + schema_obj=schema + ) class TestString: @@ -378,7 +373,9 @@ def test_regex_bad(self, bad_string: str, good_bytes, failure_byte, allowed_byte schema_obj=schema_obj, ) - @pytest.mark.parametrize("string", ["aA\u001f", '"""']) + @pytest.mark.parametrize( + "string", ["aA\u001f", '"""'] + ) def test_regex_properly_escaped_good(self, string): schema_obj = {"type": "string", "pattern": r".{3}"} # First sanity check what we're setting up @@ -391,15 +388,13 @@ def test_regex_properly_escaped_good(self, string): [ ( '"\\u001f\\u001f\u001f', - b'"\\u001f\\u001f', # able to match the first two stringified bytes - "\u001f".encode(), # fails on a literal \x1f byte - None, # hard to write a set of allowed bytes here + b'"\\u001f\\u001f', # able to match the first two stringified bytes + '\u001f'.encode(), # fails on a literal \x1f byte + None # hard to write a set of allowed bytes here ), ], ) - def test_regex_properly_escaped_bad( - self, bad_string: str, good_bytes, failure_byte, allowed_bytes - ): + def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_byte, allowed_bytes): # Note that the strings being fed in include the double quotes required # to make them JSON strings schema_obj = {"type": "string", "pattern": r".{3}"} @@ -411,6 +406,7 @@ def test_regex_properly_escaped_bad( schema_obj=schema_obj, ) + @pytest.mark.parametrize( "my_string", ["a", "bb", "ccc", "150", ",?", ".\t\n", "(){", "aA7", "\\9O"] ) @@ -753,31 +749,25 @@ def test_required_is_required(self): generate_and_check({"b": 1}, schema) generate_and_check({"a": 1, "b": "xyz"}, schema) check_match_failure( - bad_string=json_dumps({"a": 1}), + bad_string=json_dumps( + {"a": 1} + ), schema_obj=schema, ) def test_validated_against_additionalProperties(self): - schema = { - "type": "object", - "properties": {"a": {"type": "integer"}}, - "required": ["b"], - "additionalProperties": {"type": "integer"}, - } + schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b"], "additionalProperties": {"type": "integer"}} generate_and_check({"b": 1}, schema) generate_and_check({"a": 1, "b": 42}, schema) check_match_failure( - bad_string=json_dumps({"a": 1, "b": "string"}), + bad_string=json_dumps( + {"a": 1, "b": "string"} + ), schema_obj=schema, ) def test_false_additionalProperties_fails(self): - schema = { - "type": "object", - "properties": {"a": {"type": "integer"}}, - "required": ["b", "c"], - "additionalProperties": False, - } + schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b", "c"], "additionalProperties": False} with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ( @@ -850,6 +840,7 @@ def test_object_list(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( ["bad_string", "good_bytes", "failure_byte", "allowed_bytes"], [ @@ -1010,6 +1001,7 @@ def test_good_with_items(self, min_items, max_items, target_obj): } generate_and_check(target_obj, schema_obj) + @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1090,6 +1082,7 @@ def test_bad_with_prefix_and_items( schema_obj=schema_obj, ) + @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1154,6 +1147,7 @@ def test_bad_with_prefix( schema_obj=schema_obj, ) + @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1370,12 +1364,13 @@ def test_allOf_ref(self): generate_and_check(target_obj, schema_obj) def test_allOf_bad_schema(self): - schema = {"allOf": [{"type": "integer"}, {"type": "string"}]} + schema = { + "allOf" : [{ "type": "integer" }, { "type": "string" }] + } with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ve.value.args[0] == "allOf has conflicting types: [{'integer'}, {'string'}]" - class TestOneOf: @pytest.mark.parametrize("target_obj", [123, 42]) def test_oneOf_simple(self, target_obj): @@ -1390,6 +1385,7 @@ def test_oneOf_simple(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) + @pytest.mark.parametrize("target_obj", [123, True]) def test_oneOf_compound(self, target_obj): schema = """{ @@ -1427,6 +1423,7 @@ def test_enum(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1446,6 +1443,7 @@ def test_bad_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1473,10 +1471,13 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes) ("2", False), ("1", False), (True, False), - ], + ] ) def test_typed_enum_single_type(self, obj, valid): - schema_obj = {"enum": [1, "2", True], "type": "integer"} + schema_obj = { + "enum": [1, "2", True], + "type": "integer" + } if valid: validate(instance=obj, schema=schema_obj) generate_and_check(obj, schema_obj) @@ -1493,10 +1494,13 @@ def test_typed_enum_single_type(self, obj, valid): ("2", True), ("1", False), (True, False), - ], + ] ) def test_typed_enum_multiple_types(self, obj, valid): - schema_obj = {"enum": [1, "2", True], "type": ["integer", "string"]} + schema_obj = { + "enum": [1, "2", True], + "type": ["integer", "string"] + } if valid: validate(instance=obj, schema=schema_obj) generate_and_check(obj, schema_obj) @@ -1506,12 +1510,14 @@ def test_typed_enum_multiple_types(self, obj, valid): check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj) def test_invalid_typed_enum(self): - schema_obj = {"enum": [1, "2"], "type": "boolean"} + schema_obj = { + "enum": [1, "2"], + "type": "boolean" + } with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']" - class TestConst: def test_constant_int(self): # First sanity check what we're setting up @@ -1571,29 +1577,45 @@ def test_constant_precedence(self): ) def test_valid_typed_const(self): - schema_obj = {"const": 1, "type": "integer"} + schema_obj = { + "const": 1, + "type": "integer" + } target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) def test_invalid_typed_const(self): - schema_obj = {"const": 1, "type": "boolean"} + schema_obj = { + "const": 1, + "type": "boolean" + } with pytest.raises(ValidationError): gen_json(schema=schema_obj) def test_valid_enum_const(self): - schema_obj = {"const": 1, "enum": [1, 2, 3]} + schema_obj = { + "const": 1, + "enum": [1, 2, 3] + } target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) def test_invalid_enum_const(self): - schema_obj = {"const": 1, "enum": [2, 3]} + schema_obj = { + "const": 1, + "enum": [2, 3] + } with pytest.raises(ValidationError): gen_json(schema=schema_obj) def test_valid_typed_enum_const(self): - schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"} + schema_obj = { + "const": 1, + "enum": [1, "2", 3], + "type": "integer" + } target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) @@ -1601,13 +1623,17 @@ def test_valid_typed_enum_const(self): @pytest.mark.parametrize( "const", [ - "2", # right enum, wrong type - 2, # wrong enum, right type - "3", # wrong enum, wrong type - ], + "2", # right enum, wrong type + 2, # wrong enum, right type + "3", # wrong enum, wrong type + ] ) def test_invalid_typed_enum_const(self, const): - schema_obj = {"const": const, "enum": [1, "2", 3], "type": "integer"} + schema_obj = { + "const": const, + "enum": [1, "2", 3], + "type": "integer" + } with pytest.raises(ValidationError): gen_json(schema=schema_obj) @@ -1655,15 +1681,11 @@ def test_simple_additional_properties(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ - ( - {"a": "1"}, - b'{"a": ', - b'"', - INTEGER_LEADING, - ), + ({"a": "1"}, b'{"a": ', b'"', INTEGER_LEADING, ), ( {"a": 1, "b": 1.5}, b'{"a": 1, "b": 1', @@ -1683,7 +1705,9 @@ def test_simple_bad_type(self, bad_obj, good_bytes, failure_byte, allowed_bytes) schema_obj=schema_obj, ) - @pytest.mark.parametrize("target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}]) + @pytest.mark.parametrize( + "target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}] + ) def test_anyOf_additional_properties(self, target_obj): # First sanity check what we're setting up schema_obj = json.loads(self.anyOf_schema) @@ -1692,6 +1716,7 @@ def test_anyOf_additional_properties(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1733,6 +1758,7 @@ def test_properties_and_additional_properties(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1741,7 +1767,9 @@ def test_properties_and_additional_properties(self, target_obj, temperature): ({"a": 1, "b": 2}, b'{"', b"a", {b"m"}), ], ) - def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, allowed_bytes): + def test_combined_missing_properties( + self, bad_obj, good_bytes, failure_byte, allowed_bytes + ): schema_obj = json.loads(self.combined_schema) bad_string = json_dumps(bad_obj) check_match_failure( @@ -1752,6 +1780,7 @@ def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, al schema_obj=schema_obj, ) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1880,6 +1909,7 @@ def test_empty_schema(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_string, good_bytes, failure_byte, allowed_bytes", [ @@ -1908,7 +1938,9 @@ def test_empty_schema(self, target_obj, temperature): ), ], ) - def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_bytes): + def test_bad_empty_schema( + self, bad_string, good_bytes, failure_byte, allowed_bytes + ): schema_obj = json.loads(self.empty_schema) check_match_failure( bad_string=bad_string, @@ -1924,12 +1956,7 @@ def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_by # Empty property {"type": "object", "properties": {"a": {}}, "required": ["a"]}, # Empty reference - { - "type": "object", - "properties": {"a": {"$ref": "#/$defs/A"}}, - "$defs": {"A": {}}, - "required": ["a"], - }, + {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]}, ], ) @pytest.mark.parametrize( @@ -1960,14 +1987,10 @@ def test_nested_empty_schema(self, schema_obj, target_obj, temperature): # Empty property {"type": "object", "properties": {"a": {}}, "required": ["a"]}, # Empty reference - { - "type": "object", - "properties": {"a": {"$ref": "#/$defs/A"}}, - "$defs": {"A": {}}, - "required": ["a"], - }, + {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]}, ], ) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -2010,6 +2033,7 @@ def test_nested_empty_schema_with_props(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -2044,6 +2068,7 @@ def test_items(self, schema_obj): [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj ) + def test_no_items(self): schema_obj = {"type": "array", "items": False} check_match_failure( @@ -2076,6 +2101,7 @@ def test_additionalProperties(self, schema_obj): schema_obj, ) + def test_no_additionalProperties(self): schema_obj = {"type": "object", "additionalProperties": False} check_match_failure( @@ -2086,17 +2112,17 @@ def test_no_additionalProperties(self): schema_obj=schema_obj, ) - def test_ignored_keys_allowed_as_properties(): schema_obj = { "type": "object", - "properties": {key: {"type": "string"} for key in IGNORED_KEYS}, + "properties": { + key: {"type": "string"} for key in IGNORED_KEYS + }, "required": list(IGNORED_KEYS), } target_obj = {key: "value" for key in IGNORED_KEYS} generate_and_check(target_obj, schema_obj) - class TestRequiredProperties: schema_obj = { "type": "object", @@ -2105,19 +2131,10 @@ class TestRequiredProperties: "b": {"type": "number"}, "c": {"type": "boolean"}, }, - "additionalProperties": True, + "additionalProperties": True } ALL_REQUIRED = ["a", "b", "c"] - SOME_REQUIRED_SUBSETS = [ - [], - ["a"], - ["b"], - ["c"], - ["a", "b"], - ["a", "c"], - ["b", "c"], - ["a", "b", "c"], - ] + SOME_REQUIRED_SUBSETS = [[], ["a"], ["b"], ["c"], ["a", "b"], ["a", "c"], ["b", "c"], ["a", "b", "c"]] NONE_REQUIRED: list[str] = [] @pytest.mark.parametrize( @@ -2126,7 +2143,7 @@ class TestRequiredProperties: {}, {"d": "hello"}, {"d": 42, "e": True}, - ], + ] ) def test_all_required_good(self, extra_items): schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED} @@ -2146,7 +2163,7 @@ def test_all_required_good(self, extra_items): ({"c": True}), # Missing all ({}), - ], + ] ) def test_all_required_bad(self, bad_obj): schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED} @@ -2161,7 +2178,7 @@ def test_all_required_bad(self, bad_obj): {}, {"d": "hello"}, {"d": 42, "e": True}, - ], + ] ) @pytest.mark.parametrize( "required", @@ -2199,7 +2216,7 @@ def test_some_required_bad(self, required): {}, {"d": "hello"}, {"d": 42, "e": True}, - ], + ] ) @pytest.mark.parametrize( "target_obj", @@ -2212,48 +2229,55 @@ def test_some_required_bad(self, required): {"a": "hello", "c": True}, {"b": 42, "c": True}, {"a": "hello", "b": 42, "c": True}, - ], + ] ) def test_none_required(self, target_obj, extra_items): schema_obj = {**self.schema_obj, "required": self.NONE_REQUIRED} generate_and_check({**target_obj, **extra_items}, schema_obj) - class TestRequiredPropertiesScaling: - @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100]) + @pytest.mark.parametrize( + "num_properties", + [1, 2, 3, 4, 5, 10, 20, 50, 100] + ) def test_many_optional_properties_doesnt_blow_up(self, num_properties): schema_obj = { "type": "object", - "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)}, - "required": [], # Empty should be worst-case scenario + "properties": { + f"prop_{i}": {"type": "string"} for i in range(num_properties) + }, + "required": [] # Empty should be worst-case scenario } from guidance.library._json import GenJson - genjson = GenJson(schema=schema_obj) genjson._join.__wrapped__.cache_clear() _ = genjson.root() cache_info = genjson._join.__wrapped__.cache_info() # Theoretical number of cache misses under the current implementation - expected_misses = 2 * num_properties - 1 - MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from? + expected_misses = 2*num_properties - 1 + MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from? assert 0 < cache_info.misses <= expected_misses + MISSES_MAGIC_NUMBER # NOTE: that if the cache maxsize is hit, the number of misses will be more than expected # Theoretical number of total calls under the current implementation - expected_calls = num_properties * (num_properties - 1) // 2 - CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from? + expected_calls = num_properties*(num_properties - 1) // 2 + CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from? assert 0 < cache_info.hits + cache_info.misses <= expected_calls + CALLS_MAGIC_NUMBER - @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100]) + @pytest.mark.parametrize( + "num_properties", + [1, 2, 3, 4, 5, 10, 20, 50, 100] + ) def test_all_required_properties_doesnt_blow_up(self, num_properties): schema_obj = { "type": "object", - "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)}, - "required": [f"prop_{i}" for i in range(num_properties)], + "properties": { + f"prop_{i}": {"type": "string"} for i in range(num_properties) + }, + "required": [f"prop_{i}" for i in range(num_properties)] } from guidance.library._json import GenJson - genjson = GenJson(schema=schema_obj) genjson._join.__wrapped__.cache_clear() _ = genjson.root() @@ -2281,7 +2305,7 @@ class TestBooleanSchema: {"a": [1, 2, 3]}, {"a": {"b": 1}}, False, - True, + True ], ) def test_true_schema(self, target_obj): @@ -2319,7 +2343,7 @@ class TestWhitespace: ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}), # Static object: const (both item and key seps) ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}), - ], + ] ) @pytest.mark.parametrize( "separators", @@ -2345,7 +2369,7 @@ def test_separators(self, separators, schema, obj): ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}), # Static object: const (both item and key seps) ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}), - ], + ] ) @pytest.mark.parametrize( "separators", diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py index f2248129d..fd1136058 100644 --- a/tests/unit/library/json/test_refs.py +++ b/tests/unit/library/json/test_refs.py @@ -1,10 +1,9 @@ -from json import dumps as json_dumps - import pytest from jsonschema import ValidationError, validate -from .utils import check_match_failure, generate_and_check +from json import dumps as json_dumps +from .utils import check_match_failure, generate_and_check class TestRefs: @pytest.mark.parametrize( @@ -439,15 +438,9 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct( # invalid on inner field ({"bar": "a", "foo": {"bar": 1}}, False), # invalid on outer field - ({"bar": 1, "foo": {"bar": "a"}}, False), + ({ "bar": 1, "foo": {"bar": "a"}}, False), # valid on both fields - ( - { - "bar": "a", - "foo": {"bar": "a"}, - }, - True, - ), + ({"bar": "a", "foo": {"bar": "a"}, }, True), ], ) def test_refs_with_relative_uris_and_defs(self, test_object, valid): @@ -981,4 +974,4 @@ def test_empty_tokens_in_ref_json_pointer(self, test_object, valid): else: with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) \ No newline at end of file diff --git a/tests/unit/library/json/test_string_format.py b/tests/unit/library/json/test_string_format.py index 7b2dd9bdc..09712fb45 100644 --- a/tests/unit/library/json/test_string_format.py +++ b/tests/unit/library/json/test_string_format.py @@ -1,8 +1,7 @@ """Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/optional/format""" -import json - import pytest +import json from .utils import check_match_failure, generate_and_check @@ -45,35 +44,17 @@ def test_good(self, target_str): "bad_str", [ '"2020-01-32"', # a invalid date string with 32 days in January - pytest.param( - '"2021-02-29"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 29 days in February (normal) - pytest.param( - '"2020-02-30"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 30 days in February (leap) + pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 29 days in February (normal) + pytest.param('"2020-02-30"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 30 days in February (leap) '"2020-03-32"', # a invalid date string with 32 days in March - pytest.param( - '"2020-04-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 31 days in April + pytest.param('"2020-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in April '"2020-05-32"', # a invalid date string with 32 days in May - pytest.param( - '"2020-06-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 31 days in June + pytest.param('"2020-06-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in June '"2020-07-32"', # a invalid date string with 32 days in July '"2020-08-32"', # a invalid date string with 32 days in August - pytest.param( - '"2020-09-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 31 days in September + pytest.param('"2020-09-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in September '"2020-10-32"', # a invalid date string with 32 days in October - pytest.param( - '"2020-11-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 31 days in November + pytest.param('"2020-11-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in November '"2020-12-32"', # a invalid date string with 32 days in December '"2020-13-01"', # a invalid date string with invalid month '"06/19/1963"', # an invalid date string @@ -81,13 +62,8 @@ def test_good(self, target_str): '"1998-1-20"', # non-padded month dates are not valid '"1998-01-1"', # non-padded day dates are not valid '"1998-13-01"', # invalid month - pytest.param( - '"1998-04-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # invalid month-day combination - pytest.param( - '"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard") - ), # 2021 is not a leap year + pytest.param('"1998-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # invalid month-day combination + pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")), # 2021 is not a leap year '"1963-06-1\\u09ea"', # invalid non-ASCII '৪' (a Bengali 4) '"20230328"', # ISO8601 / non-RFC3339: YYYYMMDD without dashes (2023-03-28) '"2023-W01"', # ISO8601 / non-RFC3339: week number implicit day of week (2023-01-02) @@ -161,7 +137,6 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) - @pytest.mark.xfail(reason="idn-hostname format not implemented") class TestIdnHostname: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"idn-hostname"}' @@ -325,7 +300,6 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) - @pytest.mark.xfail(reason="iri-reference format is not yet implemented") class TestIriReference: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"iri-reference"}' @@ -515,40 +489,20 @@ def test_good(self, target_str): '"008:030:006Z"', # invalid time string with extra leading zeros '"8:3:6Z"', # invalid time string with no leading zero for single digit '"8:0030:6Z"', # hour, minute, second must be two digits - pytest.param( - '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, Zulu (wrong hour) - pytest.param( - '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, Zulu (wrong minute) - pytest.param( - '"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, zero time-offset (wrong hour) - pytest.param( - '"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, zero time-offset (wrong minute) - pytest.param( - '"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, positive time-offset (wrong hour) - pytest.param( - '"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, positive time-offset (wrong minute) - pytest.param( - '"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, negative time-offset (wrong hour) - pytest.param( - '"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, negative time-offset (wrong minute) + pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, Zulu (wrong hour) + pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, Zulu (wrong minute) + pytest.param('"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, zero time-offset (wrong hour) + pytest.param('"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, zero time-offset (wrong minute) + pytest.param('"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, positive time-offset (wrong hour) + pytest.param('"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, positive time-offset (wrong minute) + pytest.param('"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, negative time-offset (wrong hour) + pytest.param('"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, negative time-offset (wrong minute) '"08:30:06-8:000"', # hour, minute in time-offset must be two digits '"24:00:00Z"', # an invalid time string with invalid hour '"00:60:00Z"', # an invalid time string with invalid minute '"00:00:61Z"', # an invalid time string with invalid second - pytest.param( - '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # an invalid time string with invalid leap second (wrong hour) - pytest.param( - '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # an invalid time string with invalid leap second (wrong minute) + pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid time string with invalid leap second (wrong hour) + pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid time string with invalid leap second (wrong minute) '"01:02:03+24:00"', # an invalid time string with invalid time numoffset hour '"01:02:03+00:60"', # an invalid time string with invalid time numoffset minute '"01:02:03Z+00:30"', # an invalid time string with invalid time with both Z and numoffset @@ -584,23 +538,11 @@ class TestIpv6: '"::42:ff:1"', # leading colons is valid '"d6::"', # trailing colons is valid '"1:d6::42"', # single set of double colons in the middle is valid - pytest.param( - '"1::d6:192.168.0.1"', - marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), - ), # mixed format with the ipv4 section as decimal octets - pytest.param( - '"1:2::192.168.0.1"', - marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), - ), # mixed format with double colons between the sections - pytest.param( - '"::ffff:192.168.0.1"', - marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), - ), # mixed format with leading double colons (ipv4-mapped ipv6 address) + pytest.param('"1::d6:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with the ipv4 section as decimal octets + pytest.param('"1:2::192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with double colons between the sections + pytest.param('"::ffff:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with leading double colons (ipv4-mapped ipv6 address) '"1:2:3:4:5:6:7:8"', # 8 octets - pytest.param( - '"1000:1000:1000:1000:1000:1000:255.255.255.255"', - marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), - ), # a long valid ipv6 + pytest.param('"1000:1000:1000:1000:1000:1000:255.255.255.255"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # a long valid ipv6 ], ) def test_good(self, target_str): @@ -768,22 +710,11 @@ class TestEmail: '"te~st@example.com"', # tilde in local part is valid '"~test@example.com"', # tilde before local part is valid '"test~@example.com"', # tilde after local part is valid - pytest.param( - '"\\"joe bloggs\\"@example.com"', - marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), - ), # a quoted string with a space in the local part is valid - pytest.param( - '"\\"joe..bloggs\\"@example.com"', - marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), - ), # a quoted string with a double dot in the local part is valid - pytest.param( - '"\\"joe@bloggs\\"@example.com"', - marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), - ), # a quoted string with a @ in the local part is valid + pytest.param('"\\"joe bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a space in the local part is valid + pytest.param('"\\"joe..bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a double dot in the local part is valid + pytest.param('"\\"joe@bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a @ in the local part is valid '"joe.bloggs@[127.0.0.1]"', # an IPv4-address-literal after the @ is valid - pytest.param( - '"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard") - ), # an IPv6-address-literal after the @ is valid + pytest.param('"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")), # an IPv6-address-literal after the @ is valid '"te.s.t@example.com"', # two separated dots inside local part are valid '"riedgar+guidance@example.com"', # plus sign in local part is valid ], @@ -929,16 +860,9 @@ def test_good(self, target_str): "bad_str", [ '"1998-12-31T23:59:61Z"', # an invalid date-time past leap second, UTC - pytest.param( - '"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # an invalid date-time with leap second on a wrong minute, UTC - pytest.param( - '"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # an invalid date-time with leap second on a wrong hour, UTC - pytest.param( - '"1990-02-31T15:59:59.123-08:00"', - marks=pytest.mark.xfail(reason="valid days not yet tied to month"), - ), # an invalid day in date-time string + pytest.param('"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid date-time with leap second on a wrong minute, UTC + pytest.param('"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid date-time with leap second on a wrong hour, UTC + pytest.param('"1990-02-31T15:59:59.123-08:00"', marks=pytest.mark.xfail(reason="valid days not yet tied to month")), # an invalid day in date-time string '"1990-12-31T15:59:59-24:00"', # an invalid offset in date-time string '"1963-06-19T08:30:06.28123+01:00Z"', # an invalid closing Z after time-zone offset '"06/19/1963 08:30:06 PST"', # an invalid date-time string @@ -953,7 +877,6 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) - @pytest.mark.xfail(reason="regex format not implemented") class TestRegex: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"regex"}' diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py index 5498d718c..d75c41d4b 100644 --- a/tests/unit/library/json/utils.py +++ b/tests/unit/library/json/utils.py @@ -1,7 +1,6 @@ import json from functools import partial -from json import dumps as json_dumps -from json import loads as json_loads +from json import loads as json_loads, dumps as json_dumps from typing import Any, Optional, Union from jsonschema import validate @@ -9,15 +8,18 @@ from guidance import json as gen_json from guidance.library._json import JSONSchema -from ....utils import check_match_failure as _check_match_failure -from ....utils import check_run_with_temperature -from ....utils import generate_and_check as _generate_and_check +from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check + +from jsonschema import validate + + +import json +from functools import partial +from json import dumps as json_dumps, loads as json_loads def generate_and_check( - target_obj: Any, - schema_obj: Union[str, JSONSchema], - desired_temperature: Optional[float] = None, + target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None ): if isinstance(schema_obj, str): schema_obj = json_loads(schema_obj) @@ -30,7 +32,9 @@ def generate_and_check( # Now test that the grammar can recognize and generate prepared_json # We partial in the grammar_callable if desired_temperature is not None: - grammar_callable = partial(gen_json, schema=schema_obj, temperature=desired_temperature) + grammar_callable = partial( + gen_json, schema=schema_obj, temperature=desired_temperature + ) else: grammar_callable = partial(gen_json, schema=schema_obj) @@ -57,4 +61,4 @@ def check_match_failure( failure_byte=failure_byte, allowed_bytes=allowed_bytes, grammar=grammar, - ) + ) \ No newline at end of file From cb4845bba3211bf3346231bd7cdfc312ca024d6e Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 4 Nov 2024 11:04:00 -0800 Subject: [PATCH 59/70] raise UnsatisfiableSchemaError in const/enum --- guidance/library/_json.py | 15 ++++++++++----- tests/unit/library/json/test_json.py | 9 ++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index ac1d24470..dc2320bce 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -1022,10 +1022,13 @@ def const( schema_to_validate_against["enum"] = enum if schema_to_validate_against: # Raise a validation error if the value doesn't match the type - jsonschema.validate( - instance=value, - schema=schema_to_validate_against, - ) + try: + jsonschema.validate( + instance=value, + schema=schema_to_validate_against, + ) + except jsonschema.ValidationError as e: + raise UnsatisfiableSchemaError(f"const {value!r} does not match schema {schema_to_validate_against}") from e # Base case if isinstance(value, (type(None), bool, int, float, str)): return lm + json_dumps(value) @@ -1063,11 +1066,13 @@ def enum( options: Sequence[Union[None, bool, int, float, str, Mapping, Sequence]], instance_type: Optional[Union[str, Sequence[str]]] = None, ): + if not options: + raise UnsatisfiableSchemaError("enum has no options") all_opts: list[GrammarFunction] = [] for instance in options: try: grm = self.const(value=instance, instance_type=instance_type) - except jsonschema.ValidationError: + except UnsatisfiableSchemaError: continue all_opts.append(grm) if not all_opts: diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index b7a663a91..3596d81e3 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -1590,8 +1590,9 @@ def test_invalid_typed_const(self): "const": 1, "type": "boolean" } - with pytest.raises(ValidationError): + with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) + assert ve.value.args[0] == "const 1 does not match schema {'type': 'boolean'}" def test_valid_enum_const(self): schema_obj = { @@ -1607,8 +1608,9 @@ def test_invalid_enum_const(self): "const": 1, "enum": [2, 3] } - with pytest.raises(ValidationError): + with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) + assert ve.value.args[0] == "const 1 does not match schema {'enum': [2, 3]}" def test_valid_typed_enum_const(self): schema_obj = { @@ -1634,8 +1636,9 @@ def test_invalid_typed_enum_const(self, const): "enum": [1, "2", 3], "type": "integer" } - with pytest.raises(ValidationError): + with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) + assert ve.value.args[0] == f"const {const!r} does not match schema {{'type': 'integer', 'enum': [1, '2', 3]}}" class TestAdditionalProperties: From 5489f17df0064a7a18b28020e6fdef0be0c8fc80 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 5 Nov 2024 10:35:11 -0800 Subject: [PATCH 60/70] black and isort --- tests/unit/library/json/test_allOf.py | 59 ++-- tests/unit/library/json/test_json.py | 319 +++++++++--------- tests/unit/library/json/test_refs.py | 17 +- tests/unit/library/json/test_string_format.py | 137 ++++++-- tests/unit/library/json/utils.py | 24 +- 5 files changed, 324 insertions(+), 232 deletions(-) diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py index 670878c71..261f40345 100644 --- a/tests/unit/library/json/test_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -6,6 +6,7 @@ from jsonschema import ValidationError, validate from guidance import json as gen_json + from .utils import check_match_failure, generate_and_check @@ -138,7 +139,6 @@ def test_allOf_simple_maximum(self, test_object, valid): validate(instance=test_object, schema=schema) check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - @pytest.mark.parametrize( ["test_object", "valid"], [ @@ -357,15 +357,18 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): ({"foo": 0, "bar": 5, "baz": 4}, False), # invalid: baz is not an integer or null ({"foo": 0, "bar": 5, "baz": "quxx"}, False), - ] + ], ) @pytest.mark.parametrize( "schema", [ # The following are equivalent to this: { - "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"], "minimum": 5} + "properties": { + "foo": {"type": ["integer", "null"], "maximum": 4}, + "bar": {"minimum": 5, "maximum": 5}, + }, + "additionalProperties": {"type": ["integer", "null"], "minimum": 5}, }, # additionalProperties in parent schema { @@ -373,16 +376,22 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}} ], "properties": {"bar": {"maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"]} + "additionalProperties": {"type": ["integer", "null"]}, }, # additionalProperties in allOf { "allOf": [ - {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}, - {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}} + { + "properties": {"foo": {"maximum": 4}}, + "additionalProperties": {"minimum": 5}, + }, + { + "properties": {"bar": {"maximum": 5}}, + "additionalProperties": {"type": ["integer", "null"]}, + }, ] }, - ] + ], ) def test_additionalProperties_in_allOf(self, schema, test_object, valid): if valid: @@ -396,19 +405,19 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid): @pytest.mark.parametrize( "test_object, valid", [ - ({}, True), # empty object is valid - ({"foo": 1}, False), # foo is not a string - ({"foo": "x"}, False), # foo is not an integer - ({"foo": True}, False), # foo is not a string or an integer - ] + ({}, True), # empty object is valid + ({"foo": 1}, False), # foo is not a string + ({"foo": "x"}, False), # foo is not an integer + ({"foo": True}, False), # foo is not a string or an integer + ], ) def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): schema = { "type": "object", "allOf": [ {"additionalProperties": {"type": "integer"}}, - {"additionalProperties": {"type": "string"}} - ] + {"additionalProperties": {"type": "string"}}, + ], } if valid: validate(instance=test_object, schema=schema) @@ -418,7 +427,6 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): validate(instance=test_object, schema=schema) check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) - @pytest.mark.parametrize( "test_object, valid", [ @@ -440,15 +448,18 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): ([0, 5, 4], False), # invalid: baz is not an integer or null ([0, 5, "quxx"], False), - ] + ], ) @pytest.mark.parametrize( "schema", [ # The following are equivalent to this: { - "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}], - "items": {"type": ["integer", "null"], "minimum": 5} + "prefixItems": [ + {"type": ["integer", "null"], "maximum": 4}, + {"minimum": 5, "maximum": 5}, + ], + "items": {"type": ["integer", "null"], "minimum": 5}, }, # items in parent schema { @@ -456,17 +467,19 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, ], "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], - "items": {"type": ["integer", "null"]} - + "items": {"type": ["integer", "null"]}, }, # items in allOf { "allOf": [ {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, - {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}} + { + "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], + "items": {"type": ["integer", "null"]}, + }, ] }, - ] + ], ) def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid): if valid: diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 3596d81e3..7c9cfd38a 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -1,22 +1,22 @@ import json +import re +from json import dumps as json_dumps import pytest -import re from jsonschema import ValidationError, validate -from json import dumps as json_dumps from guidance import json as gen_json from guidance import models - from guidance.library._json import IGNORED_KEYS -from .utils import check_match_failure, generate_and_check +from .utils import check_match_failure, generate_and_check # Common sets of allowed_bytes INTEGER_LEADING = {b"-", b"0", *{bytes([i]) for i in range(ord("1"), ord("9") + 1)}} INTEGER_FOLLOWING = {bytes([i]) for i in range(ord("0"), ord("9") + 1)} A_to_Z = {bytes([i]) for i in range(ord("A"), ord("Z") + 1)} + def test_null(): schema = """{"type": "null" }""" @@ -83,16 +83,17 @@ def test_bad_integer(self, bad_string, good_bytes, failure_byte, allowed_bytes): {"type": "integer", "minimum": 5, "maximum": 4}, {"type": "integer", "minimum": 5, "exclusiveMaximum": 5}, {"type": "integer", "exclusiveMinimum": 5, "maximum": 5}, - ] + ], ) def test_unsatisfiable_min_max(self, schema): with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert re.fullmatch( r"(exclusiveMinimum|minimum) \(5\) is (greater than|equal to) (exclusiveMaximum|maximum) \((4|5)\)", - ve.value.args[0] + ve.value.args[0], ) + class TestNumber: schema = """{"type": "number" }""" @@ -152,16 +153,17 @@ def test_bad_number(self, bad_string, good_bytes, failure_byte, allowed_bytes): {"type": "integer", "minimum": 5, "maximum": 4}, {"type": "integer", "minimum": 5, "exclusiveMaximum": 5}, {"type": "integer", "exclusiveMinimum": 5, "maximum": 5}, - ] + ], ) def test_unsatisfiable_min_max(self, schema): with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert re.fullmatch( r"(exclusiveMinimum|minimum) \(5\) is (greater than|equal to) (exclusiveMaximum|maximum) \((4|5)\)", - ve.value.args[0] + ve.value.args[0], ) + class TestBoundedNumeric: @pytest.mark.parametrize( "instance, schema, should_pass", @@ -171,11 +173,15 @@ class TestBoundedNumeric: (-5, {"type": "integer", "minimum": -5}, True), pytest.param( *(5.0, {"type": "integer", "minimum": 5}, True), - marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't") + marks=pytest.mark.xfail( + reason="JSON technically allows trailing zeroes, but we currently don't" + ), ), pytest.param( *(-5.0, {"type": "integer", "minimum": -5}, True), - marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't") + marks=pytest.mark.xfail( + reason="JSON technically allows trailing zeroes, but we currently don't" + ), ), (5.1, {"type": "integer", "minimum": 5}, False), (-5.1, {"type": "integer", "minimum": -5}, False), @@ -235,7 +241,11 @@ class TestBoundedNumeric: (5.1, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True), (-9.9, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True), (5.0, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, False), - (-10.0, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, False), + ( + -10.0, + {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, + False, + ), (9.9, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True), (-5.1, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True), # --- Edge cases --- @@ -276,10 +286,10 @@ class TestBoundedNumeric: (0.2999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, True), (-0.2999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, True), (0.0999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False), - (-0.0999, {"type": "number", "minimum": -.3, "maximum": -0.1}, False), + (-0.0999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False), (0.3001, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False), (-0.3001, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False), - ] + ], ) def test_numeric_validation(self, instance, schema, should_pass): # Sanity check @@ -289,10 +299,7 @@ def test_numeric_validation(self, instance, schema, should_pass): else: with pytest.raises(ValidationError): validate(instance, schema=schema) - check_match_failure( - bad_string=json_dumps(instance), - schema_obj=schema - ) + check_match_failure(bad_string=json_dumps(instance), schema_obj=schema) class TestString: @@ -373,9 +380,7 @@ def test_regex_bad(self, bad_string: str, good_bytes, failure_byte, allowed_byte schema_obj=schema_obj, ) - @pytest.mark.parametrize( - "string", ["aA\u001f", '"""'] - ) + @pytest.mark.parametrize("string", ["aA\u001f", '"""']) def test_regex_properly_escaped_good(self, string): schema_obj = {"type": "string", "pattern": r".{3}"} # First sanity check what we're setting up @@ -388,13 +393,15 @@ def test_regex_properly_escaped_good(self, string): [ ( '"\\u001f\\u001f\u001f', - b'"\\u001f\\u001f', # able to match the first two stringified bytes - '\u001f'.encode(), # fails on a literal \x1f byte - None # hard to write a set of allowed bytes here + b'"\\u001f\\u001f', # able to match the first two stringified bytes + "\u001f".encode(), # fails on a literal \x1f byte + None, # hard to write a set of allowed bytes here ), ], ) - def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_byte, allowed_bytes): + def test_regex_properly_escaped_bad( + self, bad_string: str, good_bytes, failure_byte, allowed_bytes + ): # Note that the strings being fed in include the double quotes required # to make them JSON strings schema_obj = {"type": "string", "pattern": r".{3}"} @@ -406,7 +413,6 @@ def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_b schema_obj=schema_obj, ) - @pytest.mark.parametrize( "my_string", ["a", "bb", "ccc", "150", ",?", ".\t\n", "(){", "aA7", "\\9O"] ) @@ -729,7 +735,9 @@ def test_unsatisfiable_properties_raises(self): with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ve.value.args[0] == "Required property 'b' is unsatisfiable" - assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + assert ( + ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + ) def test_unsatisfiable_additional_properties_raises(self): schema = { @@ -740,8 +748,14 @@ def test_unsatisfiable_additional_properties_raises(self): } with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) - assert ve.value.args[0] == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b']" - assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + assert ( + ve.value.args[0] + == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b']" + ) + assert ( + ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + ) + class TestObjectWithMissingRequired: def test_required_is_required(self): @@ -749,32 +763,40 @@ def test_required_is_required(self): generate_and_check({"b": 1}, schema) generate_and_check({"a": 1, "b": "xyz"}, schema) check_match_failure( - bad_string=json_dumps( - {"a": 1} - ), + bad_string=json_dumps({"a": 1}), schema_obj=schema, ) def test_validated_against_additionalProperties(self): - schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b"], "additionalProperties": {"type": "integer"}} + schema = { + "type": "object", + "properties": {"a": {"type": "integer"}}, + "required": ["b"], + "additionalProperties": {"type": "integer"}, + } generate_and_check({"b": 1}, schema) generate_and_check({"a": 1, "b": 42}, schema) check_match_failure( - bad_string=json_dumps( - {"a": 1, "b": "string"} - ), + bad_string=json_dumps({"a": 1, "b": "string"}), schema_obj=schema, ) def test_false_additionalProperties_fails(self): - schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b", "c"], "additionalProperties": False} + schema = { + "type": "object", + "properties": {"a": {"type": "integer"}}, + "required": ["b", "c"], + "additionalProperties": False, + } with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ( ve.value.args[0] == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b', 'c']" ) - assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + assert ( + ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + ) class TestSimpleArray: @@ -840,7 +862,6 @@ def test_object_list(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( ["bad_string", "good_bytes", "failure_byte", "allowed_bytes"], [ @@ -866,17 +887,14 @@ def test_bad_object(self, bad_string, good_bytes, failure_byte, allowed_bytes): ) def test_unsatisfiable_prefixItem_ok(self): - schema = { - "type": "array", - "prefixItems": [{"type": "integer"}, False] - } + schema = {"type": "array", "prefixItems": [{"type": "integer"}, False]} generate_and_check([42], schema) check_match_failure( bad_string="[42, 43]", good_bytes=b"[42", failure_byte=b",", allowed_bytes={b"]"} | INTEGER_FOLLOWING, - schema_obj=schema + schema_obj=schema, ) def test_unsatisfiable_prefixItem_raises(self): @@ -888,13 +906,15 @@ def test_unsatisfiable_prefixItem_raises(self): with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ve.value.args[0] == "prefixItems[1] is unsatisfiable but min_items is 2" - assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + assert ( + ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + ) def test_unsatisfiable_items_ok(self): schema = { "type": "array", "prefixItems": [{"type": "integer"}], - "items": {"allOf": [{"type": "integer"}, False]} + "items": {"allOf": [{"type": "integer"}, False]}, } generate_and_check([42], schema) check_match_failure( @@ -902,7 +922,7 @@ def test_unsatisfiable_items_ok(self): good_bytes=b"[42", failure_byte=b",", allowed_bytes={b"]"} | INTEGER_FOLLOWING, - schema_obj=schema + schema_obj=schema, ) def test_unsatisfiable_items_raises(self): @@ -914,9 +934,13 @@ def test_unsatisfiable_items_raises(self): } with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) - assert ve.value.args[0] == "prefixItems has too few elements (1) to satisfy minItems (2) but item schema is unsatisfiable" + assert ( + ve.value.args[0] + == "prefixItems has too few elements (1) to satisfy minItems (2) but item schema is unsatisfiable" + ) assert ve.value.__cause__.args[0] == "allOf contains a 'false' schema" + class TestArrayWithLengthConstraints: prefix_schema_obj = [{"type": "integer"}, {"type": "boolean"}] items_schema_obj = {"type": "string"} @@ -1001,7 +1025,6 @@ def test_good_with_items(self, min_items, max_items, target_obj): } generate_and_check(target_obj, schema_obj) - @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1082,7 +1105,6 @@ def test_bad_with_prefix_and_items( schema_obj=schema_obj, ) - @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1147,7 +1169,6 @@ def test_bad_with_prefix( schema_obj=schema_obj, ) - @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1294,9 +1315,7 @@ def test_anyOf_objects(self, target_obj, temperature): generate_and_check(target_obj, schema_obj, desired_temperature=temperature) def test_anyOf_unsatisfiable_ok(self): - schema = { - "anyOf": [{"type": "integer"}, False] - } + schema = {"anyOf": [{"type": "integer"}, False]} generate_and_check(3, schema) def test_anyOf_unsatisfiable_raises(self): @@ -1305,7 +1324,11 @@ def test_anyOf_unsatisfiable_raises(self): } with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) - assert ve.value.args[0] == 'all anyOf schemas are unsatisfiable: [{"type": "integer", "minimum": 10, "maximum": 0}, false]' + assert ( + ve.value.args[0] + == 'all anyOf schemas are unsatisfiable: [{"type": "integer", "minimum": 10, "maximum": 0}, false]' + ) + class TestAllOf: @pytest.mark.parametrize( @@ -1364,13 +1387,12 @@ def test_allOf_ref(self): generate_and_check(target_obj, schema_obj) def test_allOf_bad_schema(self): - schema = { - "allOf" : [{ "type": "integer" }, { "type": "string" }] - } + schema = {"allOf": [{"type": "integer"}, {"type": "string"}]} with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ve.value.args[0] == "allOf has conflicting types: [{'integer'}, {'string'}]" + class TestOneOf: @pytest.mark.parametrize("target_obj", [123, 42]) def test_oneOf_simple(self, target_obj): @@ -1385,7 +1407,6 @@ def test_oneOf_simple(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) - @pytest.mark.parametrize("target_obj", [123, True]) def test_oneOf_compound(self, target_obj): schema = """{ @@ -1423,7 +1444,6 @@ def test_enum(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1443,7 +1463,6 @@ def test_bad_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1471,13 +1490,10 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes) ("2", False), ("1", False), (True, False), - ] + ], ) def test_typed_enum_single_type(self, obj, valid): - schema_obj = { - "enum": [1, "2", True], - "type": "integer" - } + schema_obj = {"enum": [1, "2", True], "type": "integer"} if valid: validate(instance=obj, schema=schema_obj) generate_and_check(obj, schema_obj) @@ -1494,13 +1510,10 @@ def test_typed_enum_single_type(self, obj, valid): ("2", True), ("1", False), (True, False), - ] + ], ) def test_typed_enum_multiple_types(self, obj, valid): - schema_obj = { - "enum": [1, "2", True], - "type": ["integer", "string"] - } + schema_obj = {"enum": [1, "2", True], "type": ["integer", "string"]} if valid: validate(instance=obj, schema=schema_obj) generate_and_check(obj, schema_obj) @@ -1510,14 +1523,12 @@ def test_typed_enum_multiple_types(self, obj, valid): check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj) def test_invalid_typed_enum(self): - schema_obj = { - "enum": [1, "2"], - "type": "boolean" - } + schema_obj = {"enum": [1, "2"], "type": "boolean"} with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']" + class TestConst: def test_constant_int(self): # First sanity check what we're setting up @@ -1577,47 +1588,31 @@ def test_constant_precedence(self): ) def test_valid_typed_const(self): - schema_obj = { - "const": 1, - "type": "integer" - } + schema_obj = {"const": 1, "type": "integer"} target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) def test_invalid_typed_const(self): - schema_obj = { - "const": 1, - "type": "boolean" - } + schema_obj = {"const": 1, "type": "boolean"} with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) assert ve.value.args[0] == "const 1 does not match schema {'type': 'boolean'}" def test_valid_enum_const(self): - schema_obj = { - "const": 1, - "enum": [1, 2, 3] - } + schema_obj = {"const": 1, "enum": [1, 2, 3]} target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) def test_invalid_enum_const(self): - schema_obj = { - "const": 1, - "enum": [2, 3] - } + schema_obj = {"const": 1, "enum": [2, 3]} with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) assert ve.value.args[0] == "const 1 does not match schema {'enum': [2, 3]}" def test_valid_typed_enum_const(self): - schema_obj = { - "const": 1, - "enum": [1, "2", 3], - "type": "integer" - } + schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"} target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) @@ -1625,20 +1620,19 @@ def test_valid_typed_enum_const(self): @pytest.mark.parametrize( "const", [ - "2", # right enum, wrong type - 2, # wrong enum, right type - "3", # wrong enum, wrong type - ] + "2", # right enum, wrong type + 2, # wrong enum, right type + "3", # wrong enum, wrong type + ], ) def test_invalid_typed_enum_const(self, const): - schema_obj = { - "const": const, - "enum": [1, "2", 3], - "type": "integer" - } + schema_obj = {"const": const, "enum": [1, "2", 3], "type": "integer"} with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) - assert ve.value.args[0] == f"const {const!r} does not match schema {{'type': 'integer', 'enum': [1, '2', 3]}}" + assert ( + ve.value.args[0] + == f"const {const!r} does not match schema {{'type': 'integer', 'enum': [1, '2', 3]}}" + ) class TestAdditionalProperties: @@ -1684,11 +1678,15 @@ def test_simple_additional_properties(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ - ({"a": "1"}, b'{"a": ', b'"', INTEGER_LEADING, ), + ( + {"a": "1"}, + b'{"a": ', + b'"', + INTEGER_LEADING, + ), ( {"a": 1, "b": 1.5}, b'{"a": 1, "b": 1', @@ -1708,9 +1706,7 @@ def test_simple_bad_type(self, bad_obj, good_bytes, failure_byte, allowed_bytes) schema_obj=schema_obj, ) - @pytest.mark.parametrize( - "target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}] - ) + @pytest.mark.parametrize("target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}]) def test_anyOf_additional_properties(self, target_obj): # First sanity check what we're setting up schema_obj = json.loads(self.anyOf_schema) @@ -1719,7 +1715,6 @@ def test_anyOf_additional_properties(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1761,7 +1756,6 @@ def test_properties_and_additional_properties(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1770,9 +1764,7 @@ def test_properties_and_additional_properties(self, target_obj, temperature): ({"a": 1, "b": 2}, b'{"', b"a", {b"m"}), ], ) - def test_combined_missing_properties( - self, bad_obj, good_bytes, failure_byte, allowed_bytes - ): + def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, allowed_bytes): schema_obj = json.loads(self.combined_schema) bad_string = json_dumps(bad_obj) check_match_failure( @@ -1783,7 +1775,6 @@ def test_combined_missing_properties( schema_obj=schema_obj, ) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1912,7 +1903,6 @@ def test_empty_schema(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_string, good_bytes, failure_byte, allowed_bytes", [ @@ -1941,9 +1931,7 @@ def test_empty_schema(self, target_obj, temperature): ), ], ) - def test_bad_empty_schema( - self, bad_string, good_bytes, failure_byte, allowed_bytes - ): + def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_bytes): schema_obj = json.loads(self.empty_schema) check_match_failure( bad_string=bad_string, @@ -1959,7 +1947,12 @@ def test_bad_empty_schema( # Empty property {"type": "object", "properties": {"a": {}}, "required": ["a"]}, # Empty reference - {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]}, + { + "type": "object", + "properties": {"a": {"$ref": "#/$defs/A"}}, + "$defs": {"A": {}}, + "required": ["a"], + }, ], ) @pytest.mark.parametrize( @@ -1990,10 +1983,14 @@ def test_nested_empty_schema(self, schema_obj, target_obj, temperature): # Empty property {"type": "object", "properties": {"a": {}}, "required": ["a"]}, # Empty reference - {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]}, + { + "type": "object", + "properties": {"a": {"$ref": "#/$defs/A"}}, + "$defs": {"A": {}}, + "required": ["a"], + }, ], ) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -2036,7 +2033,6 @@ def test_nested_empty_schema_with_props(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) - @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -2071,7 +2067,6 @@ def test_items(self, schema_obj): [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj ) - def test_no_items(self): schema_obj = {"type": "array", "items": False} check_match_failure( @@ -2104,7 +2099,6 @@ def test_additionalProperties(self, schema_obj): schema_obj, ) - def test_no_additionalProperties(self): schema_obj = {"type": "object", "additionalProperties": False} check_match_failure( @@ -2115,17 +2109,17 @@ def test_no_additionalProperties(self): schema_obj=schema_obj, ) + def test_ignored_keys_allowed_as_properties(): schema_obj = { "type": "object", - "properties": { - key: {"type": "string"} for key in IGNORED_KEYS - }, + "properties": {key: {"type": "string"} for key in IGNORED_KEYS}, "required": list(IGNORED_KEYS), } target_obj = {key: "value" for key in IGNORED_KEYS} generate_and_check(target_obj, schema_obj) + class TestRequiredProperties: schema_obj = { "type": "object", @@ -2134,10 +2128,19 @@ class TestRequiredProperties: "b": {"type": "number"}, "c": {"type": "boolean"}, }, - "additionalProperties": True + "additionalProperties": True, } ALL_REQUIRED = ["a", "b", "c"] - SOME_REQUIRED_SUBSETS = [[], ["a"], ["b"], ["c"], ["a", "b"], ["a", "c"], ["b", "c"], ["a", "b", "c"]] + SOME_REQUIRED_SUBSETS = [ + [], + ["a"], + ["b"], + ["c"], + ["a", "b"], + ["a", "c"], + ["b", "c"], + ["a", "b", "c"], + ] NONE_REQUIRED: list[str] = [] @pytest.mark.parametrize( @@ -2146,7 +2149,7 @@ class TestRequiredProperties: {}, {"d": "hello"}, {"d": 42, "e": True}, - ] + ], ) def test_all_required_good(self, extra_items): schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED} @@ -2166,7 +2169,7 @@ def test_all_required_good(self, extra_items): ({"c": True}), # Missing all ({}), - ] + ], ) def test_all_required_bad(self, bad_obj): schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED} @@ -2181,7 +2184,7 @@ def test_all_required_bad(self, bad_obj): {}, {"d": "hello"}, {"d": 42, "e": True}, - ] + ], ) @pytest.mark.parametrize( "required", @@ -2219,7 +2222,7 @@ def test_some_required_bad(self, required): {}, {"d": "hello"}, {"d": 42, "e": True}, - ] + ], ) @pytest.mark.parametrize( "target_obj", @@ -2232,55 +2235,48 @@ def test_some_required_bad(self, required): {"a": "hello", "c": True}, {"b": 42, "c": True}, {"a": "hello", "b": 42, "c": True}, - ] + ], ) def test_none_required(self, target_obj, extra_items): schema_obj = {**self.schema_obj, "required": self.NONE_REQUIRED} generate_and_check({**target_obj, **extra_items}, schema_obj) + class TestRequiredPropertiesScaling: - @pytest.mark.parametrize( - "num_properties", - [1, 2, 3, 4, 5, 10, 20, 50, 100] - ) + @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100]) def test_many_optional_properties_doesnt_blow_up(self, num_properties): schema_obj = { "type": "object", - "properties": { - f"prop_{i}": {"type": "string"} for i in range(num_properties) - }, - "required": [] # Empty should be worst-case scenario + "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)}, + "required": [], # Empty should be worst-case scenario } from guidance.library._json import GenJson + genjson = GenJson(schema=schema_obj) genjson._join.__wrapped__.cache_clear() _ = genjson.root() cache_info = genjson._join.__wrapped__.cache_info() # Theoretical number of cache misses under the current implementation - expected_misses = 2*num_properties - 1 - MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from? + expected_misses = 2 * num_properties - 1 + MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from? assert 0 < cache_info.misses <= expected_misses + MISSES_MAGIC_NUMBER # NOTE: that if the cache maxsize is hit, the number of misses will be more than expected # Theoretical number of total calls under the current implementation - expected_calls = num_properties*(num_properties - 1) // 2 - CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from? + expected_calls = num_properties * (num_properties - 1) // 2 + CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from? assert 0 < cache_info.hits + cache_info.misses <= expected_calls + CALLS_MAGIC_NUMBER - @pytest.mark.parametrize( - "num_properties", - [1, 2, 3, 4, 5, 10, 20, 50, 100] - ) + @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100]) def test_all_required_properties_doesnt_blow_up(self, num_properties): schema_obj = { "type": "object", - "properties": { - f"prop_{i}": {"type": "string"} for i in range(num_properties) - }, - "required": [f"prop_{i}" for i in range(num_properties)] + "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)}, + "required": [f"prop_{i}" for i in range(num_properties)], } from guidance.library._json import GenJson + genjson = GenJson(schema=schema_obj) genjson._join.__wrapped__.cache_clear() _ = genjson.root() @@ -2308,7 +2304,7 @@ class TestBooleanSchema: {"a": [1, 2, 3]}, {"a": {"b": 1}}, False, - True + True, ], ) def test_true_schema(self, target_obj): @@ -2327,7 +2323,10 @@ def test_false_required_property(self): with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) assert ve.value.args[0] == "Required property 'a' is unsatisfiable" - assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + assert ( + ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`" + ) + class TestWhitespace: seps = [ @@ -2346,7 +2345,7 @@ class TestWhitespace: ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}), # Static object: const (both item and key seps) ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}), - ] + ], ) @pytest.mark.parametrize( "separators", @@ -2372,7 +2371,7 @@ def test_separators(self, separators, schema, obj): ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}), # Static object: const (both item and key seps) ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}), - ] + ], ) @pytest.mark.parametrize( "separators", diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py index fd1136058..f2248129d 100644 --- a/tests/unit/library/json/test_refs.py +++ b/tests/unit/library/json/test_refs.py @@ -1,10 +1,11 @@ +from json import dumps as json_dumps + import pytest from jsonschema import ValidationError, validate -from json import dumps as json_dumps - from .utils import check_match_failure, generate_and_check + class TestRefs: @pytest.mark.parametrize( ["test_object", "valid"], @@ -438,9 +439,15 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct( # invalid on inner field ({"bar": "a", "foo": {"bar": 1}}, False), # invalid on outer field - ({ "bar": 1, "foo": {"bar": "a"}}, False), + ({"bar": 1, "foo": {"bar": "a"}}, False), # valid on both fields - ({"bar": "a", "foo": {"bar": "a"}, }, True), + ( + { + "bar": "a", + "foo": {"bar": "a"}, + }, + True, + ), ], ) def test_refs_with_relative_uris_and_defs(self, test_object, valid): @@ -974,4 +981,4 @@ def test_empty_tokens_in_ref_json_pointer(self, test_object, valid): else: with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) \ No newline at end of file + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) diff --git a/tests/unit/library/json/test_string_format.py b/tests/unit/library/json/test_string_format.py index 09712fb45..7b2dd9bdc 100644 --- a/tests/unit/library/json/test_string_format.py +++ b/tests/unit/library/json/test_string_format.py @@ -1,8 +1,9 @@ """Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/optional/format""" -import pytest import json +import pytest + from .utils import check_match_failure, generate_and_check @@ -44,17 +45,35 @@ def test_good(self, target_str): "bad_str", [ '"2020-01-32"', # a invalid date string with 32 days in January - pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 29 days in February (normal) - pytest.param('"2020-02-30"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 30 days in February (leap) + pytest.param( + '"2021-02-29"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 29 days in February (normal) + pytest.param( + '"2020-02-30"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 30 days in February (leap) '"2020-03-32"', # a invalid date string with 32 days in March - pytest.param('"2020-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in April + pytest.param( + '"2020-04-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 31 days in April '"2020-05-32"', # a invalid date string with 32 days in May - pytest.param('"2020-06-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in June + pytest.param( + '"2020-06-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 31 days in June '"2020-07-32"', # a invalid date string with 32 days in July '"2020-08-32"', # a invalid date string with 32 days in August - pytest.param('"2020-09-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in September + pytest.param( + '"2020-09-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 31 days in September '"2020-10-32"', # a invalid date string with 32 days in October - pytest.param('"2020-11-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in November + pytest.param( + '"2020-11-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # a invalid date string with 31 days in November '"2020-12-32"', # a invalid date string with 32 days in December '"2020-13-01"', # a invalid date string with invalid month '"06/19/1963"', # an invalid date string @@ -62,8 +81,13 @@ def test_good(self, target_str): '"1998-1-20"', # non-padded month dates are not valid '"1998-01-1"', # non-padded day dates are not valid '"1998-13-01"', # invalid month - pytest.param('"1998-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # invalid month-day combination - pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")), # 2021 is not a leap year + pytest.param( + '"1998-04-31"', + marks=pytest.mark.xfail(reason="number of days not yet tied to month"), + ), # invalid month-day combination + pytest.param( + '"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard") + ), # 2021 is not a leap year '"1963-06-1\\u09ea"', # invalid non-ASCII '৪' (a Bengali 4) '"20230328"', # ISO8601 / non-RFC3339: YYYYMMDD without dashes (2023-03-28) '"2023-W01"', # ISO8601 / non-RFC3339: week number implicit day of week (2023-01-02) @@ -137,6 +161,7 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) + @pytest.mark.xfail(reason="idn-hostname format not implemented") class TestIdnHostname: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"idn-hostname"}' @@ -300,6 +325,7 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) + @pytest.mark.xfail(reason="iri-reference format is not yet implemented") class TestIriReference: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"iri-reference"}' @@ -489,20 +515,40 @@ def test_good(self, target_str): '"008:030:006Z"', # invalid time string with extra leading zeros '"8:3:6Z"', # invalid time string with no leading zero for single digit '"8:0030:6Z"', # hour, minute, second must be two digits - pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, Zulu (wrong hour) - pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, Zulu (wrong minute) - pytest.param('"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, zero time-offset (wrong hour) - pytest.param('"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, zero time-offset (wrong minute) - pytest.param('"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, positive time-offset (wrong hour) - pytest.param('"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, positive time-offset (wrong minute) - pytest.param('"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, negative time-offset (wrong hour) - pytest.param('"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, negative time-offset (wrong minute) + pytest.param( + '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, Zulu (wrong hour) + pytest.param( + '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, Zulu (wrong minute) + pytest.param( + '"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, zero time-offset (wrong hour) + pytest.param( + '"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, zero time-offset (wrong minute) + pytest.param( + '"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, positive time-offset (wrong hour) + pytest.param( + '"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, positive time-offset (wrong minute) + pytest.param( + '"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, negative time-offset (wrong hour) + pytest.param( + '"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # invalid leap second, negative time-offset (wrong minute) '"08:30:06-8:000"', # hour, minute in time-offset must be two digits '"24:00:00Z"', # an invalid time string with invalid hour '"00:60:00Z"', # an invalid time string with invalid minute '"00:00:61Z"', # an invalid time string with invalid second - pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid time string with invalid leap second (wrong hour) - pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid time string with invalid leap second (wrong minute) + pytest.param( + '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # an invalid time string with invalid leap second (wrong hour) + pytest.param( + '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # an invalid time string with invalid leap second (wrong minute) '"01:02:03+24:00"', # an invalid time string with invalid time numoffset hour '"01:02:03+00:60"', # an invalid time string with invalid time numoffset minute '"01:02:03Z+00:30"', # an invalid time string with invalid time with both Z and numoffset @@ -538,11 +584,23 @@ class TestIpv6: '"::42:ff:1"', # leading colons is valid '"d6::"', # trailing colons is valid '"1:d6::42"', # single set of double colons in the middle is valid - pytest.param('"1::d6:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with the ipv4 section as decimal octets - pytest.param('"1:2::192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with double colons between the sections - pytest.param('"::ffff:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with leading double colons (ipv4-mapped ipv6 address) + pytest.param( + '"1::d6:192.168.0.1"', + marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), + ), # mixed format with the ipv4 section as decimal octets + pytest.param( + '"1:2::192.168.0.1"', + marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), + ), # mixed format with double colons between the sections + pytest.param( + '"::ffff:192.168.0.1"', + marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), + ), # mixed format with leading double colons (ipv4-mapped ipv6 address) '"1:2:3:4:5:6:7:8"', # 8 octets - pytest.param('"1000:1000:1000:1000:1000:1000:255.255.255.255"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # a long valid ipv6 + pytest.param( + '"1000:1000:1000:1000:1000:1000:255.255.255.255"', + marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), + ), # a long valid ipv6 ], ) def test_good(self, target_str): @@ -710,11 +768,22 @@ class TestEmail: '"te~st@example.com"', # tilde in local part is valid '"~test@example.com"', # tilde before local part is valid '"test~@example.com"', # tilde after local part is valid - pytest.param('"\\"joe bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a space in the local part is valid - pytest.param('"\\"joe..bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a double dot in the local part is valid - pytest.param('"\\"joe@bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a @ in the local part is valid + pytest.param( + '"\\"joe bloggs\\"@example.com"', + marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), + ), # a quoted string with a space in the local part is valid + pytest.param( + '"\\"joe..bloggs\\"@example.com"', + marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), + ), # a quoted string with a double dot in the local part is valid + pytest.param( + '"\\"joe@bloggs\\"@example.com"', + marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), + ), # a quoted string with a @ in the local part is valid '"joe.bloggs@[127.0.0.1]"', # an IPv4-address-literal after the @ is valid - pytest.param('"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")), # an IPv6-address-literal after the @ is valid + pytest.param( + '"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard") + ), # an IPv6-address-literal after the @ is valid '"te.s.t@example.com"', # two separated dots inside local part are valid '"riedgar+guidance@example.com"', # plus sign in local part is valid ], @@ -860,9 +929,16 @@ def test_good(self, target_str): "bad_str", [ '"1998-12-31T23:59:61Z"', # an invalid date-time past leap second, UTC - pytest.param('"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid date-time with leap second on a wrong minute, UTC - pytest.param('"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid date-time with leap second on a wrong hour, UTC - pytest.param('"1990-02-31T15:59:59.123-08:00"', marks=pytest.mark.xfail(reason="valid days not yet tied to month")), # an invalid day in date-time string + pytest.param( + '"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # an invalid date-time with leap second on a wrong minute, UTC + pytest.param( + '"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") + ), # an invalid date-time with leap second on a wrong hour, UTC + pytest.param( + '"1990-02-31T15:59:59.123-08:00"', + marks=pytest.mark.xfail(reason="valid days not yet tied to month"), + ), # an invalid day in date-time string '"1990-12-31T15:59:59-24:00"', # an invalid offset in date-time string '"1963-06-19T08:30:06.28123+01:00Z"', # an invalid closing Z after time-zone offset '"06/19/1963 08:30:06 PST"', # an invalid date-time string @@ -877,6 +953,7 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) + @pytest.mark.xfail(reason="regex format not implemented") class TestRegex: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"regex"}' diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py index d75c41d4b..5498d718c 100644 --- a/tests/unit/library/json/utils.py +++ b/tests/unit/library/json/utils.py @@ -1,6 +1,7 @@ import json from functools import partial -from json import loads as json_loads, dumps as json_dumps +from json import dumps as json_dumps +from json import loads as json_loads from typing import Any, Optional, Union from jsonschema import validate @@ -8,18 +9,15 @@ from guidance import json as gen_json from guidance.library._json import JSONSchema -from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check - -from jsonschema import validate - - -import json -from functools import partial -from json import dumps as json_dumps, loads as json_loads +from ....utils import check_match_failure as _check_match_failure +from ....utils import check_run_with_temperature +from ....utils import generate_and_check as _generate_and_check def generate_and_check( - target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None + target_obj: Any, + schema_obj: Union[str, JSONSchema], + desired_temperature: Optional[float] = None, ): if isinstance(schema_obj, str): schema_obj = json_loads(schema_obj) @@ -32,9 +30,7 @@ def generate_and_check( # Now test that the grammar can recognize and generate prepared_json # We partial in the grammar_callable if desired_temperature is not None: - grammar_callable = partial( - gen_json, schema=schema_obj, temperature=desired_temperature - ) + grammar_callable = partial(gen_json, schema=schema_obj, temperature=desired_temperature) else: grammar_callable = partial(gen_json, schema=schema_obj) @@ -61,4 +57,4 @@ def check_match_failure( failure_byte=failure_byte, allowed_bytes=allowed_bytes, grammar=grammar, - ) \ No newline at end of file + ) From 1295affed69973c3a239063f29390538fb68b3d5 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 5 Nov 2024 13:48:25 -0800 Subject: [PATCH 61/70] refactor allOf contents out to reduce_schema --- guidance/library/_json.py | 25 ++-- guidance/library/_json_normalization copy.py | 94 ++++++++++++++ guidance/library/_json_normalization.py | 121 +++++++++++++++++++ 3 files changed, 230 insertions(+), 10 deletions(-) create mode 100644 guidance/library/_json_normalization copy.py create mode 100644 guidance/library/_json_normalization.py diff --git a/guidance/library/_json.py b/guidance/library/_json.py index dc2320bce..50eea0a01 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -778,14 +778,8 @@ def oneOf( warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.") return lm + self.anyOf(anyof_list=oneof_list, base_uri=base_uri) - @guidance(stateless=True) - def allOf( - self, - lm, - *, - parent_schema: JSONSchema, - base_uri: str, - ): + + def reduce_schema(self, orig_schema: dict[str, Any], base_uri: str) -> dict[str, Any]: types: list[set[str]] = [] properties: defaultdict[str, list[JSONSchema]] = defaultdict(list) required: dict[str, None] = dict() # use a dict for ordered-set behavior @@ -909,7 +903,7 @@ def add_schema(schema: JSONSchema, base_uri: str): continue handle_keyword(key, value, schema, base_uri) - add_schema(parent_schema, base_uri) + add_schema(orig_schema, base_uri) combined_schema: dict[str, Any] = {} @@ -1002,8 +996,19 @@ def reduce_types(type_a: set[str], type_b: set[str]) -> set[str]: assert not set(combined_schema) & set(other_data) combined_schema.update(other_data) + return combined_schema - return lm + self.json(json_schema=combined_schema, base_uri=base_uri) + + @guidance(stateless=True) + def allOf( + self, + lm, + *, + parent_schema: JSONSchema, + base_uri: str, + ): + reduced_schema = self.reduce_schema(parent_schema, base_uri) + return lm + self.json(json_schema=reduced_schema, base_uri=base_uri) @guidance(stateless=True) diff --git a/guidance/library/_json_normalization copy.py b/guidance/library/_json_normalization copy.py new file mode 100644 index 000000000..12c9f29c3 --- /dev/null +++ b/guidance/library/_json_normalization copy.py @@ -0,0 +1,94 @@ +from __future__ import annotations +from typing import Any, Optional, TypedDict, cast, NamedTuple +from itertools import product + +from typing import TypedDict, List, Union, Any, Dict + +# Unnormalized Schema Definitions + +class BaseSchema(TypedDict, total=False): + type: str + properties: Dict[str, Any] + items: Any + required: List[str] + enum: List[Any] + const: Any + minimum: int + maximum: int + minLength: int + maxLength: int + pattern: str + # Other schema keywords can be added here + +class Schema(BaseSchema): + allOf: List[Schema] + anyOf: List[Schema] + oneOf: List[Schema] + +# Normalized Schema Definitions + +class NormalizedAllOfSchema(BaseSchema): + allOf: List[BaseSchema] + +class NormalizedAnyOfSchema(TypedDict): + anyOf: List[Union[NormalizedAllOfSchema, BaseSchema]] + +class NormalizedOneOfSchema(TypedDict): + oneOf: List[Union[NormalizedAllOfSchema, BaseSchema]] + +# The NormalizedSchema can be a NormalizedBaseSchema or top-level combinators without nesting +NormalizedSchema = Union[BaseSchema, NormalizedAllOfSchema, NormalizedAnyOfSchema, NormalizedOneOfSchema] + +class Combinators(NamedTuple): + allOf: List[Schema] + anyOf: List[Schema] + oneOf: List[Schema] + +def maybe_allOf(nodes: list[BaseSchema], siblings: Optional[BaseSchema] = None) -> NormalizedSchema: + if len(nodes) == 1 and not siblings: + return nodes[0] + if siblings: + return {"allOf": [*nodes, siblings]} + return {"allOf": nodes} + +def get_combinators_and_siblings(node: Schema) -> tuple[Combinators, BaseSchema]: + allOf = cast(list[Schema], node.pop("allOf", [])) + oneOf = cast(list[Schema], node.pop("oneOf", [])) + anyOf = cast(list[Schema], node.pop("anyOf", [])) + siblings = node + return Combinators(allOf, oneOf, anyOf), siblings + +def normalize(orig_node: Schema) -> NormalizedSchema: + ((allOf_list, oneOf_list, anyOf_list), siblings) = get_combinators_and_siblings(orig_node) + if not allOf_list and not oneOf_list and not anyOf_list: + return siblings + + allOf_list = normalize_allOf(allOf_list, siblings) + anyOf_list = normalize_oneOf_anyOf(anyOf_list) + + if oneOf_list and anyOf_list: + node: NormalizedOneOfSchema = { + "oneOf": [ + maybe_allOf([oneOf_item, anyOf_item, *allOf_list]) + for anyOf_item in anyOf_list + for oneOf_item in oneOf_list + ] + } + elif oneOf_list: + node: NormalizedOneOfSchema = { + "oneOf": [ + maybe_allOf([oneOf_item, *allOf_list]) + for oneOf_item in oneOf_list + ] + } + elif anyOf_list: + node: NormalizedAnyOfSchema = { + "anyOf": [ + maybe_allOf([anyOf_item, *allOf_list]) + for anyOf_item in anyOf_list + ] + } + elif allOf_list: + node: NormalizedSchema = maybe_allOf(allOf_list) + + return node \ No newline at end of file diff --git a/guidance/library/_json_normalization.py b/guidance/library/_json_normalization.py new file mode 100644 index 000000000..448f2ea8b --- /dev/null +++ b/guidance/library/_json_normalization.py @@ -0,0 +1,121 @@ +from __future__ import annotations +from typing import Any, Optional +from itertools import product + + +def normalize_allOf(subnodes: list[dict[str, Any]], siblings: dict[str, Any] = {}) -> dict[str, Any]: + if not subnodes: + return siblings + + # Normalization will ensure that there are no applicable "anyOf" or "oneOf" keys + # except at the top level of the schema + subnodes = [normalize(node) for node in subnodes] + groups = [] + if any("oneOf" in node for node in subnodes): + # Binds more tightly than anyOf + kind = "oneOf" + elif any("anyOf" in node for node in subnodes): + kind = "anyOf" + else: + # We are done + return maybe_allOf(subnodes, siblings) + + other = [] + if siblings: + other.append(siblings) + + for node in subnodes: + if "oneOf" in node and "anyOf" in node: + oneOf_list = node.pop("oneOf") + anyOf_list = node.pop("anyOf") + groups.append(list(product(oneOf_list, anyOf_list))) + + elif "oneOf" in node: + oneOf_list = node.pop("oneOf") + groups.append(oneOf_list) + + elif "anyOf" in node: + anyOf_list = node.pop("anyOf") + groups.append(anyOf_list) + + if "allOf" in node: + other.extend(node.pop("allOf")) + + if node: + # If there are any keys left, they need to end up in every allOf + other.append(node) + + return { + kind: [ + maybe_allOf([*item, *other]) + for item in product(*groups) + ] + } + +def maybe_allOf(nodes: list[dict[str, Any]], siblings: Optional[dict[str, Any]] = None) -> dict[str, Any]: + if len(nodes) == 1 and not siblings: + return nodes[0] + if siblings: + return {"allOf": [*nodes, siblings]} + return {"allOf": nodes} + + +def normalize(node: dict[str, Any]) -> dict[str, Any]: + node = normalize_allOf(node.pop("allOf", []), node) + oneOf_list = node.pop("oneOf", []) + anyOf_list = node.pop("anyOf", []) + allOf_list = node.pop("allOf", []) + + if oneOf_list and anyOf_list: + node = { + "oneOf": [ + maybe_allOf([oneOf_item, anyOf_item, *allOf_list], node) + for anyOf_item in anyOf_list + for oneOf_item in oneOf_list + ] + } + elif oneOf_list: + node = { + "oneOf": [ + maybe_allOf([oneOf_item, *allOf_list], node) + for oneOf_item in oneOf_list + ] + } + elif anyOf_list: + node = { + "anyOf": [ + maybe_allOf([anyOf_item, *allOf_list], node) + for anyOf_item in anyOf_list + ] + } + elif allOf_list: + node = maybe_allOf([node, *allOf_list]) + return node + +def normalize_oneOf_anyOf(node: dict[str, Any]) -> dict[str, Any]: + oneOf_list = node.pop("oneOf", []) + anyOf_list = node.pop("anyOf", []) + allOf_list = node.pop("allOf", []) + + if oneOf_list and anyOf_list: + node = { + "oneOf": [ + maybe_allOf([oneOf_item, anyOf_item, *allOf_list], node) + for anyOf_item in anyOf_list + for oneOf_item in oneOf_list + ] + } + elif oneOf_list: + node = { + "oneOf": [ + maybe_allOf([oneOf_item, *allOf_list], node) + for oneOf_item in oneOf_list + ] + } + elif anyOf_list: + node = { + "anyOf": [ + maybe_allOf([anyOf_item, *allOf_list], node) + for anyOf_item in anyOf_list + ] + } From 67fd5e5a55355bb5e7023316fff2e2837838173f Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 5 Nov 2024 15:22:58 -0800 Subject: [PATCH 62/70] refactor sibling handling into push_sibling_keys --- guidance/library/_json.py | 137 ++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 74 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 50eea0a01..208b6dcfc 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -778,6 +778,59 @@ def oneOf( warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.") return lm + self.anyOf(anyof_list=oneof_list, base_uri=base_uri) + def push_sibling_keys(self, json_schema: JSONSchema) -> JSONSchema: + """ + If sibling keys are present next to anyOf, oneOf, or $ref, we push them down into an allOf. + """ + parent_schema = json_schema.copy() + anyof_list = parent_schema.pop(Keyword.ANYOF, []) + oneof_list = parent_schema.pop(Keyword.ONEOF, []) + allof_list = parent_schema.pop(Keyword.ALLOF, []) + ref = parent_schema.pop(Keyword.REF, None) + + common = [] + if VALID_KEYS.intersection(parent_schema) - set(IGNORED_KEYS): + # If there are any sibling keys, we need to push them down into an allOf + common.append(parent_schema) + if allof_list: + common.extend(allof_list) + if ref: + # TODO: $id / base_uri? + common.append({Keyword.REF: ref}) + + if anyof_list and oneof_list: + return { + "oneOf": [ + {"allOf": common + [one_item, any_item]} + for one_item in oneof_list + for any_item in anyof_list + ], + } + + if oneof_list: + if not common: + return {"oneOf": oneof_list} + return { + "oneOf": [ + {"allOf": common + [one_item]} + for one_item in oneof_list + ], + } + + if anyof_list: + if not common: + return {"anyOf": anyof_list} + return { + "anyOf": [ + {"allOf": common + [any_item]} + for any_item in anyof_list + ], + } + + if len(common) == 1: + return common[0] + + return {"allOf": common} def reduce_schema(self, orig_schema: dict[str, Any], base_uri: str) -> dict[str, Any]: types: list[set[str]] = [] @@ -1130,96 +1183,32 @@ def json( if json_schema == {}: return lm + self.any() - validate_json_node_keys(json_schema) - if Keyword.ID in json_schema: # "cd" into the new base_uri base_uri = urijoin(base_uri, json_schema[Keyword.ID]) - if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema: - parent_schema = json_schema.copy() - anyof_list = parent_schema.pop(Keyword.ANYOF) - allof_list = parent_schema.pop(Keyword.ALLOF) - # Reduce the problem to an anyOf of allOfs - return lm + self.anyOf( - anyof_list=[ - {"allOf": [any_item, *allof_list], **parent_schema} - for any_item in anyof_list - ], - base_uri=base_uri, - ) - - if Keyword.ALLOF in json_schema and Keyword.ONEOF in json_schema: - parent_schema = json_schema.copy() - allof_list = parent_schema.pop(Keyword.ALLOF) - oneof_list = parent_schema.pop(Keyword.ONEOF) - # Reduce the problem to a oneOf of allOfs - return lm + self.oneOf( - oneof_list=[ - {"allOf": [one_item, *allof_list], **parent_schema} - for one_item in oneof_list - ], - base_uri=base_uri, - ) - - if Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema: - parent_schema = json_schema.copy() - anyof_list = parent_schema.pop(Keyword.ANYOF) - oneof_list = parent_schema.pop(Keyword.ONEOF) - assert Keyword.ALLOF not in parent_schema - # Reduce the problem to a oneOf of allOfs - return lm + self.oneOf( - oneof_list=[ - {"allOf": [one_item, any_item], **parent_schema} - for any_item in anyof_list - for one_item in oneof_list - ], - base_uri=base_uri, - ) + validate_json_node_keys(json_schema) + json_schema = self.push_sibling_keys(json_schema) if Keyword.ALLOF in json_schema: + sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF) + assert not sibling_keys return lm + self.allOf(parent_schema=json_schema, base_uri=base_uri) if Keyword.ANYOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF) - if not sibling_keys: - return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF], base_uri=base_uri) - # Let the allOf function handle anyOfs with sibling keys - parent_schema = json_schema.copy() - anyof_list = parent_schema.pop(Keyword.ANYOF) - return lm + self.anyOf( - anyof_list=[ - {"allOf": [any_item], **parent_schema} - for any_item in anyof_list - ], - base_uri=base_uri, - ) + assert not sibling_keys + return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF], base_uri=base_uri) if Keyword.ONEOF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF) - if not sibling_keys: - return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF], base_uri=base_uri) - # Let the allOf function handle oneOfs with sibling keys - parent_schema = json_schema.copy() - oneof_list = parent_schema.pop(Keyword.ONEOF) - assert Keyword.ALLOF not in parent_schema - return lm + self.oneOf( - oneof_list=[ - {"allOf": [one_item], **parent_schema} - for one_item in oneof_list - ], - base_uri=base_uri, - ) + assert not sibling_keys + return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF], base_uri=base_uri) if Keyword.REF in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.REF) - if not sibling_keys: - return lm + self.ref(reference=json_schema[Keyword.REF], base_uri=base_uri) - # Let the allOf function handle refs with sibling keys - parent_schema = json_schema.copy() - ref = parent_schema.pop(Keyword.REF) - assert Keyword.ALLOF not in parent_schema - return lm + self.allOf(parent_schema={"allOf": [{Keyword.REF: ref}], **parent_schema}, base_uri=base_uri) + assert not sibling_keys + return lm + self.ref(reference=json_schema[Keyword.REF], base_uri=base_uri) if Keyword.CONST in json_schema: sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM} From e4ff3aa37eccb5869069d38f3dd2d5edce93b465 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 5 Nov 2024 15:23:49 -0800 Subject: [PATCH 63/70] drop unnecessary cd-ing --- guidance/library/_json.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 208b6dcfc..5dc992c67 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -1183,9 +1183,6 @@ def json( if json_schema == {}: return lm + self.any() - if Keyword.ID in json_schema: - # "cd" into the new base_uri - base_uri = urijoin(base_uri, json_schema[Keyword.ID]) validate_json_node_keys(json_schema) json_schema = self.push_sibling_keys(json_schema) From b989a0390ad533e3367c2dcd3e8b8f221a17cbb9 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 5 Nov 2024 15:42:41 -0800 Subject: [PATCH 64/70] reorder properties in test cases to be consistent with the order we validate (now prioritizing base schema) --- tests/unit/library/json/test_allOf.py | 6 +++--- tests/unit/library/json/test_refs.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py index 261f40345..2802a858b 100644 --- a/tests/unit/library/json/test_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -373,10 +373,10 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): # additionalProperties in parent schema { "allOf": [ - {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}} + {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}} ], - "properties": {"bar": {"maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"]}, + "properties": {"foo": {"maximum": 4}}, + "additionalProperties": {"minimum": 5}, }, # additionalProperties in allOf { diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py index f2248129d..db695cc7e 100644 --- a/tests/unit/library/json/test_refs.py +++ b/tests/unit/library/json/test_refs.py @@ -437,14 +437,14 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct( ["test_object", "valid"], [ # invalid on inner field - ({"bar": "a", "foo": {"bar": 1}}, False), + ({"foo": {"bar": 1}, "bar": "a"}, False), # invalid on outer field - ({"bar": 1, "foo": {"bar": "a"}}, False), + ({"foo": {"bar": "a"}, "bar": 1}, False), # valid on both fields ( { - "bar": "a", "foo": {"bar": "a"}, + "bar": "a", }, True, ), @@ -475,11 +475,11 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid): ["test_object", "valid"], [ # invalid on inner field - ({"bar": "a", "foo": {"bar": 1}}, False), + ({"foo": {"bar": 1}, "bar": "a"}, False), # invalid on outer field - ({"bar": 1, "foo": {"bar": "a"}}, False), + ({"foo": {"bar": "a"}, "bar": 1}, False), # valid on both fields - ({"bar": "a", "foo": {"bar": "a"}}, True), + ({"foo": {"bar": "a"}, "bar": "a"}, True), ], ) def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid): From 241c47e5c36b2a8173e146bfb672e156dd9dbfa4 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 5 Nov 2024 17:52:57 -0800 Subject: [PATCH 65/70] simplify enum and const validation --- guidance/library/_json.py | 49 ++++++++++------------------ tests/unit/library/json/test_json.py | 8 ++--- 2 files changed, 22 insertions(+), 35 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 5dc992c67..06a9adaed 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -1070,23 +1070,15 @@ def const( lm, *, value: Union[None, bool, int, float, str, Mapping, Sequence], - instance_type: Optional[Union[str, Sequence[str]]] = None, - enum: Optional[Sequence[Union[None, bool, int, float, str, Mapping, Sequence]]] = None, + parent_schema: JSONSchema, ): - schema_to_validate_against: dict[str, Any] = {} - if instance_type is not None: - schema_to_validate_against["type"] = instance_type - if enum is not None: - schema_to_validate_against["enum"] = enum - if schema_to_validate_against: - # Raise a validation error if the value doesn't match the type - try: - jsonschema.validate( - instance=value, - schema=schema_to_validate_against, - ) - except jsonschema.ValidationError as e: - raise UnsatisfiableSchemaError(f"const {value!r} does not match schema {schema_to_validate_against}") from e + try: + jsonschema.validate( + instance=value, + schema=parent_schema, + ) + except jsonschema.ValidationError as e: + raise UnsatisfiableSchemaError(f"const {value!r} is inconsistent with parent schema: {parent_schema}") from e # Base case if isinstance(value, (type(None), bool, int, float, str)): return lm + json_dumps(value) @@ -1122,19 +1114,19 @@ def enum( lm, *, options: Sequence[Union[None, bool, int, float, str, Mapping, Sequence]], - instance_type: Optional[Union[str, Sequence[str]]] = None, + parent_schema: JSONSchema, ): if not options: raise UnsatisfiableSchemaError("enum has no options") all_opts: list[GrammarFunction] = [] for instance in options: try: - grm = self.const(value=instance, instance_type=instance_type) + grm = self.const(value=instance, parent_schema=parent_schema) except UnsatisfiableSchemaError: continue all_opts.append(grm) if not all_opts: - raise ValueError(f"No valid options found for enum with type {instance_type!r}: {options}") + raise UnsatisfiableSchemaError(f"All enum options {options} are inconsistent with parent schema: {parent_schema}") return lm + select(options=all_opts) @@ -1183,7 +1175,14 @@ def json( if json_schema == {}: return lm + self.any() + # Early exit for simple cases + if Keyword.CONST in json_schema: + return lm + self.const(value=json_schema[Keyword.CONST], parent_schema=json_schema) + if Keyword.ENUM in json_schema: + return lm + self.enum(options=json_schema[Keyword.ENUM], parent_schema=json_schema) + + # More complex cases; validation needed validate_json_node_keys(json_schema) json_schema = self.push_sibling_keys(json_schema) @@ -1207,18 +1206,6 @@ def json( assert not sibling_keys return lm + self.ref(reference=json_schema[Keyword.REF], base_uri=base_uri) - if Keyword.CONST in json_schema: - sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM} - if sibling_keys: - raise NotImplementedError(f"const with sibling keys is not yet supported. Got {sibling_keys}") - return lm + self.const(value=json_schema[Keyword.CONST], instance_type=json_schema.get(Keyword.TYPE, None), enum=json_schema.get(Keyword.ENUM, None)) - - if Keyword.ENUM in json_schema: - sibling_keys = get_sibling_keys(json_schema, Keyword.ENUM) - {Keyword.TYPE} - if sibling_keys: - raise NotImplementedError(f"enum with sibling keys is not yet supported. Got {sibling_keys}") - return lm + self.enum(options=json_schema[Keyword.ENUM], instance_type=json_schema.get(Keyword.TYPE, None)) - if Keyword.TYPE in json_schema and isinstance(json_schema[Keyword.TYPE], str): target_type = json_schema[Keyword.TYPE] if target_type == JSONType.NULL: diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 4f9b07597..c276d7dfc 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -1527,7 +1527,7 @@ def test_invalid_typed_enum(self): schema_obj = {"enum": [1, "2"], "type": "boolean"} with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) - assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']" + assert ve.value.args[0] == f"All enum options {[1, '2']} are inconsistent with parent schema: {schema_obj}" class TestConst: @@ -1598,7 +1598,7 @@ def test_invalid_typed_const(self): schema_obj = {"const": 1, "type": "boolean"} with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) - assert ve.value.args[0] == "const 1 does not match schema {'type': 'boolean'}" + assert ve.value.args[0] == f"const {1!r} is inconsistent with parent schema: {schema_obj}" def test_valid_enum_const(self): schema_obj = {"const": 1, "enum": [1, 2, 3]} @@ -1610,7 +1610,7 @@ def test_invalid_enum_const(self): schema_obj = {"const": 1, "enum": [2, 3]} with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) - assert ve.value.args[0] == "const 1 does not match schema {'enum': [2, 3]}" + assert ve.value.args[0] == f"const {1!r} is inconsistent with parent schema: {schema_obj}" def test_valid_typed_enum_const(self): schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"} @@ -1632,7 +1632,7 @@ def test_invalid_typed_enum_const(self, const): gen_json(schema=schema_obj) assert ( ve.value.args[0] - == f"const {const!r} does not match schema {{'type': 'integer', 'enum': [1, '2', 3]}}" + == f"const {const!r} is inconsistent with parent schema: {schema_obj}" ) From 134410c52cc04aac606bbe082551a73f4cbcb0aa Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 5 Nov 2024 18:01:28 -0800 Subject: [PATCH 66/70] mypy --- guidance/library/_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 06a9adaed..932658a44 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -778,7 +778,7 @@ def oneOf( warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.") return lm + self.anyOf(anyof_list=oneof_list, base_uri=base_uri) - def push_sibling_keys(self, json_schema: JSONSchema) -> JSONSchema: + def push_sibling_keys(self, json_schema: dict[str, Any]) -> dict[str, Any]: """ If sibling keys are present next to anyOf, oneOf, or $ref, we push them down into an allOf. """ @@ -1057,7 +1057,7 @@ def allOf( self, lm, *, - parent_schema: JSONSchema, + parent_schema: dict[str, Any], base_uri: str, ): reduced_schema = self.reduce_schema(parent_schema, base_uri) From 5d1c98618e87259ebee74ff7f181c31857970794 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 5 Nov 2024 18:01:54 -0800 Subject: [PATCH 67/70] delete files accidentally committed --- guidance/library/_json_normalization copy.py | 94 -------------- guidance/library/_json_normalization.py | 121 ------------------- 2 files changed, 215 deletions(-) delete mode 100644 guidance/library/_json_normalization copy.py delete mode 100644 guidance/library/_json_normalization.py diff --git a/guidance/library/_json_normalization copy.py b/guidance/library/_json_normalization copy.py deleted file mode 100644 index 12c9f29c3..000000000 --- a/guidance/library/_json_normalization copy.py +++ /dev/null @@ -1,94 +0,0 @@ -from __future__ import annotations -from typing import Any, Optional, TypedDict, cast, NamedTuple -from itertools import product - -from typing import TypedDict, List, Union, Any, Dict - -# Unnormalized Schema Definitions - -class BaseSchema(TypedDict, total=False): - type: str - properties: Dict[str, Any] - items: Any - required: List[str] - enum: List[Any] - const: Any - minimum: int - maximum: int - minLength: int - maxLength: int - pattern: str - # Other schema keywords can be added here - -class Schema(BaseSchema): - allOf: List[Schema] - anyOf: List[Schema] - oneOf: List[Schema] - -# Normalized Schema Definitions - -class NormalizedAllOfSchema(BaseSchema): - allOf: List[BaseSchema] - -class NormalizedAnyOfSchema(TypedDict): - anyOf: List[Union[NormalizedAllOfSchema, BaseSchema]] - -class NormalizedOneOfSchema(TypedDict): - oneOf: List[Union[NormalizedAllOfSchema, BaseSchema]] - -# The NormalizedSchema can be a NormalizedBaseSchema or top-level combinators without nesting -NormalizedSchema = Union[BaseSchema, NormalizedAllOfSchema, NormalizedAnyOfSchema, NormalizedOneOfSchema] - -class Combinators(NamedTuple): - allOf: List[Schema] - anyOf: List[Schema] - oneOf: List[Schema] - -def maybe_allOf(nodes: list[BaseSchema], siblings: Optional[BaseSchema] = None) -> NormalizedSchema: - if len(nodes) == 1 and not siblings: - return nodes[0] - if siblings: - return {"allOf": [*nodes, siblings]} - return {"allOf": nodes} - -def get_combinators_and_siblings(node: Schema) -> tuple[Combinators, BaseSchema]: - allOf = cast(list[Schema], node.pop("allOf", [])) - oneOf = cast(list[Schema], node.pop("oneOf", [])) - anyOf = cast(list[Schema], node.pop("anyOf", [])) - siblings = node - return Combinators(allOf, oneOf, anyOf), siblings - -def normalize(orig_node: Schema) -> NormalizedSchema: - ((allOf_list, oneOf_list, anyOf_list), siblings) = get_combinators_and_siblings(orig_node) - if not allOf_list and not oneOf_list and not anyOf_list: - return siblings - - allOf_list = normalize_allOf(allOf_list, siblings) - anyOf_list = normalize_oneOf_anyOf(anyOf_list) - - if oneOf_list and anyOf_list: - node: NormalizedOneOfSchema = { - "oneOf": [ - maybe_allOf([oneOf_item, anyOf_item, *allOf_list]) - for anyOf_item in anyOf_list - for oneOf_item in oneOf_list - ] - } - elif oneOf_list: - node: NormalizedOneOfSchema = { - "oneOf": [ - maybe_allOf([oneOf_item, *allOf_list]) - for oneOf_item in oneOf_list - ] - } - elif anyOf_list: - node: NormalizedAnyOfSchema = { - "anyOf": [ - maybe_allOf([anyOf_item, *allOf_list]) - for anyOf_item in anyOf_list - ] - } - elif allOf_list: - node: NormalizedSchema = maybe_allOf(allOf_list) - - return node \ No newline at end of file diff --git a/guidance/library/_json_normalization.py b/guidance/library/_json_normalization.py deleted file mode 100644 index 448f2ea8b..000000000 --- a/guidance/library/_json_normalization.py +++ /dev/null @@ -1,121 +0,0 @@ -from __future__ import annotations -from typing import Any, Optional -from itertools import product - - -def normalize_allOf(subnodes: list[dict[str, Any]], siblings: dict[str, Any] = {}) -> dict[str, Any]: - if not subnodes: - return siblings - - # Normalization will ensure that there are no applicable "anyOf" or "oneOf" keys - # except at the top level of the schema - subnodes = [normalize(node) for node in subnodes] - groups = [] - if any("oneOf" in node for node in subnodes): - # Binds more tightly than anyOf - kind = "oneOf" - elif any("anyOf" in node for node in subnodes): - kind = "anyOf" - else: - # We are done - return maybe_allOf(subnodes, siblings) - - other = [] - if siblings: - other.append(siblings) - - for node in subnodes: - if "oneOf" in node and "anyOf" in node: - oneOf_list = node.pop("oneOf") - anyOf_list = node.pop("anyOf") - groups.append(list(product(oneOf_list, anyOf_list))) - - elif "oneOf" in node: - oneOf_list = node.pop("oneOf") - groups.append(oneOf_list) - - elif "anyOf" in node: - anyOf_list = node.pop("anyOf") - groups.append(anyOf_list) - - if "allOf" in node: - other.extend(node.pop("allOf")) - - if node: - # If there are any keys left, they need to end up in every allOf - other.append(node) - - return { - kind: [ - maybe_allOf([*item, *other]) - for item in product(*groups) - ] - } - -def maybe_allOf(nodes: list[dict[str, Any]], siblings: Optional[dict[str, Any]] = None) -> dict[str, Any]: - if len(nodes) == 1 and not siblings: - return nodes[0] - if siblings: - return {"allOf": [*nodes, siblings]} - return {"allOf": nodes} - - -def normalize(node: dict[str, Any]) -> dict[str, Any]: - node = normalize_allOf(node.pop("allOf", []), node) - oneOf_list = node.pop("oneOf", []) - anyOf_list = node.pop("anyOf", []) - allOf_list = node.pop("allOf", []) - - if oneOf_list and anyOf_list: - node = { - "oneOf": [ - maybe_allOf([oneOf_item, anyOf_item, *allOf_list], node) - for anyOf_item in anyOf_list - for oneOf_item in oneOf_list - ] - } - elif oneOf_list: - node = { - "oneOf": [ - maybe_allOf([oneOf_item, *allOf_list], node) - for oneOf_item in oneOf_list - ] - } - elif anyOf_list: - node = { - "anyOf": [ - maybe_allOf([anyOf_item, *allOf_list], node) - for anyOf_item in anyOf_list - ] - } - elif allOf_list: - node = maybe_allOf([node, *allOf_list]) - return node - -def normalize_oneOf_anyOf(node: dict[str, Any]) -> dict[str, Any]: - oneOf_list = node.pop("oneOf", []) - anyOf_list = node.pop("anyOf", []) - allOf_list = node.pop("allOf", []) - - if oneOf_list and anyOf_list: - node = { - "oneOf": [ - maybe_allOf([oneOf_item, anyOf_item, *allOf_list], node) - for anyOf_item in anyOf_list - for oneOf_item in oneOf_list - ] - } - elif oneOf_list: - node = { - "oneOf": [ - maybe_allOf([oneOf_item, *allOf_list], node) - for oneOf_item in oneOf_list - ] - } - elif anyOf_list: - node = { - "anyOf": [ - maybe_allOf([anyOf_item, *allOf_list], node) - for anyOf_item in anyOf_list - ] - } From 156728d04da9c941317e041d5f80a9513ee917af Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 6 Nov 2024 10:20:28 -0800 Subject: [PATCH 68/70] a few extra test cases --- tests/unit/library/json/test_allOf.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py index 2802a858b..e27d93940 100644 --- a/tests/unit/library/json/test_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -80,6 +80,8 @@ def test_allOf_with_base_schema(self, test_object, valid): (25, True), # mismatch one (35, False), + # mismatch other + (15, False), ], ) def test_allOf_simple_types(self, test_object, valid): @@ -98,6 +100,8 @@ def test_allOf_simple_types(self, test_object, valid): @pytest.mark.parametrize( ["test_object", "valid"], [ + # mismatch both + (15, False), # mismatch one (25, False), # valid @@ -120,6 +124,8 @@ def test_allOf_simple_minimum(self, test_object, valid): @pytest.mark.parametrize( ["test_object", "valid"], [ + # mismatch both + (35, False), # mismatch one (25, False), # valid From 68900ed3a84016c99389134b17fcc30a4bfe0ac0 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 6 Nov 2024 10:25:56 -0800 Subject: [PATCH 69/70] more explicit NotImplementedError for oneOf, anyOf --- guidance/library/_json.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 932658a44..c01122edf 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -844,7 +844,18 @@ def reduce_schema(self, orig_schema: dict[str, Any], base_uri: str) -> dict[str, consts: list[Any] = [] def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri: str): - if key == Keyword.REF: + if key == Keyword.ANYOF: + raise NotImplementedError("anyOf in allOf not yet supported") + + elif key == Keyword.ONEOF: + raise NotImplementedError("oneOf in allOf not yet supported") + + elif key == Keyword.ALLOF: + value = cast(Sequence[JSONSchema], value) + for schema in value: + add_schema(schema, base_uri) + + elif key == Keyword.REF: ref = cast(str, value) abspath = urijoin(base_uri, ref) resolved = self._resolver.lookup(abspath) @@ -865,11 +876,6 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri value_set = set(value) types.append(value_set) - elif key == Keyword.ALLOF: - value = cast(Sequence[JSONSchema], value) - for schema in value: - add_schema(schema, base_uri) - elif key == ObjectKeywords.PROPERTIES: value = cast(dict[str, JSONSchema], value) for name, schema in value.items(): @@ -914,10 +920,6 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri (value, exempt_prefix_items) ) - elif key in set(Keyword): - # If we've done our job right, we should never hit this case... - raise NotImplementedError(f"Don't yet know how to handle {key} in allOf") - elif key in other_data: if key in { NumberKeywords.MINIMUM, NumberKeywords.EXCLUSIVE_MINIMUM, From 0a63f592ba076cb85002bae35a62fb4cb0db2f80 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 6 Nov 2024 11:27:11 -0800 Subject: [PATCH 70/70] add some comments about dropped UnsatisfiableSchemaErrors --- guidance/library/_json.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index c01122edf..61019e16a 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -577,7 +577,9 @@ def object( # We get here if the schema is a literal False or is otherwise determined to be unsatisfiable if name in required: raise UnsatisfiableSchemaError(f"Required property {name!r} is unsatisfiable") from e - # Use json_dumps to properly quote / escape the key + # If the property is not required, we will just "blacklist" this key (e.g. if the schema was False) + # Note that we're just dropping this exception. + # Use json_dumps to properly quote / escape the key before adding it to the blacklist key = json_dumps(name) illegal_keys.add(key) @@ -592,6 +594,10 @@ def object( f"Required properties not in properties but additionalProperties is unsatisfiable." f" Missing required properties: {list(r for r in required if r not in properties)}" ) from e + else: + # If "additionalProperties" is unsatisfiable but there are no required properties that need to be validated against it, + # then we can safely ignore it. Note that this means that we are just going to drop this exception. + pass keys: list[str] = [] required_items: list[bool] = [] @@ -682,6 +688,11 @@ def array( f"prefixItems has too few elements ({len(prefix_items_schema)}) to satisfy minItems ({min_items})" f" but item schema is unsatisfiable" ) from e + else: + # If we've already satisfied min_items, we can just ignore the unsatisfiable item schema. This just means + # that we can't generate any more items after the prefix items. + # Note that this means that we are just going to drop this exception. + pass required_items = [] optional_items = [] @@ -697,8 +708,10 @@ def array( if i < min_items: raise UnsatisfiableSchemaError(f"prefixItems[{i}] is unsatisfiable but min_items is {min_items}") from e # Having an unsatisfiable prefix item is fine if we've already satisfied min_items, but this effectively sets max_items to i + # Note that this means that we are just going to drop this exception. max_items = i break + elif items_grammar is not None: item = items_grammar else: @@ -757,6 +770,8 @@ def anyOf( try: options.append(self.json(json_schema=item, base_uri=base_uri)) except UnsatisfiableSchemaError: + # No need to raise an error if one of the schemas is unsatisfiable. We'll check again at the end and raise if ALL + # schemas are unsatisfiable. Note that this means that we are just going to drop this exception. pass if not options: # Can't really point to any one schema that's unsatisfiable, so let's include all the schemas in the error message @@ -1125,6 +1140,8 @@ def enum( try: grm = self.const(value=instance, parent_schema=parent_schema) except UnsatisfiableSchemaError: + # Like anyOf, we don't want to raise an error if one of the options is unsatisfiable. We'll check again at the end + # and raise if ALL options are unsatisfiable. Note that this means that we are just going to drop this exception. continue all_opts.append(grm) if not all_opts: