diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py index 4cc039d9d..670878c71 100644 --- a/tests/unit/library/json/test_allOf.py +++ b/tests/unit/library/json/test_allOf.py @@ -6,7 +6,6 @@ from jsonschema import ValidationError, validate from guidance import json as gen_json - from .utils import check_match_failure, generate_and_check @@ -139,6 +138,7 @@ def test_allOf_simple_maximum(self, test_object, valid): validate(instance=test_object, schema=schema) check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + @pytest.mark.parametrize( ["test_object", "valid"], [ @@ -357,18 +357,15 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): ({"foo": 0, "bar": 5, "baz": 4}, False), # invalid: baz is not an integer or null ({"foo": 0, "bar": 5, "baz": "quxx"}, False), - ], + ] ) @pytest.mark.parametrize( "schema", [ # The following are equivalent to this: { - "properties": { - "foo": {"type": ["integer", "null"], "maximum": 4}, - "bar": {"minimum": 5, "maximum": 5}, - }, - "additionalProperties": {"type": ["integer", "null"], "minimum": 5}, + "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}}, + "additionalProperties": {"type": ["integer", "null"], "minimum": 5} }, # additionalProperties in parent schema { @@ -376,22 +373,16 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid): {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}} ], "properties": {"bar": {"maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"]}, + "additionalProperties": {"type": ["integer", "null"]} }, # additionalProperties in allOf { "allOf": [ - { - "properties": {"foo": {"maximum": 4}}, - "additionalProperties": {"minimum": 5}, - }, - { - "properties": {"bar": {"maximum": 5}}, - "additionalProperties": {"type": ["integer", "null"]}, - }, + {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}, + {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}} ] }, - ], + ] ) def test_additionalProperties_in_allOf(self, schema, test_object, valid): if valid: @@ -405,19 +396,19 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid): @pytest.mark.parametrize( "test_object, valid", [ - ({}, True), # empty object is valid - ({"foo": 1}, False), # foo is not a string - ({"foo": "x"}, False), # foo is not an integer - ({"foo": True}, False), # foo is not a string or an integer - ], + ({}, True), # empty object is valid + ({"foo": 1}, False), # foo is not a string + ({"foo": "x"}, False), # foo is not an integer + ({"foo": True}, False), # foo is not a string or an integer + ] ) def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): schema = { "type": "object", "allOf": [ {"additionalProperties": {"type": "integer"}}, - {"additionalProperties": {"type": "string"}}, - ], + {"additionalProperties": {"type": "string"}} + ] } if valid: validate(instance=test_object, schema=schema) @@ -449,18 +440,15 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): ([0, 5, 4], False), # invalid: baz is not an integer or null ([0, 5, "quxx"], False), - ], + ] ) @pytest.mark.parametrize( "schema", [ # The following are equivalent to this: { - "prefixItems": [ - {"type": ["integer", "null"], "maximum": 4}, - {"minimum": 5, "maximum": 5}, - ], - "items": {"type": ["integer", "null"], "minimum": 5}, + "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}], + "items": {"type": ["integer", "null"], "minimum": 5} }, # items in parent schema { @@ -468,19 +456,17 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid): {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, ], "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], - "items": {"type": ["integer", "null"]}, + "items": {"type": ["integer", "null"]} + }, # items in allOf { "allOf": [ {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}}, - { - "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], - "items": {"type": ["integer", "null"]}, - }, + {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}} ] }, - ], + ] ) def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid): if valid: diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py index 3a3b553d9..b7a663a91 100644 --- a/tests/unit/library/json/test_json.py +++ b/tests/unit/library/json/test_json.py @@ -1,22 +1,22 @@ import json -from json import dumps as json_dumps import pytest import re from jsonschema import ValidationError, validate +from json import dumps as json_dumps from guidance import json as gen_json from guidance import models -from guidance.library._json import IGNORED_KEYS +from guidance.library._json import IGNORED_KEYS from .utils import check_match_failure, generate_and_check + # Common sets of allowed_bytes INTEGER_LEADING = {b"-", b"0", *{bytes([i]) for i in range(ord("1"), ord("9") + 1)}} INTEGER_FOLLOWING = {bytes([i]) for i in range(ord("0"), ord("9") + 1)} A_to_Z = {bytes([i]) for i in range(ord("A"), ord("Z") + 1)} - def test_null(): schema = """{"type": "null" }""" @@ -171,15 +171,11 @@ class TestBoundedNumeric: (-5, {"type": "integer", "minimum": -5}, True), pytest.param( *(5.0, {"type": "integer", "minimum": 5}, True), - marks=pytest.mark.xfail( - reason="JSON technically allows trailing zeroes, but we currently don't" - ), + marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't") ), pytest.param( *(-5.0, {"type": "integer", "minimum": -5}, True), - marks=pytest.mark.xfail( - reason="JSON technically allows trailing zeroes, but we currently don't" - ), + marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't") ), (5.1, {"type": "integer", "minimum": 5}, False), (-5.1, {"type": "integer", "minimum": -5}, False), @@ -239,11 +235,7 @@ class TestBoundedNumeric: (5.1, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True), (-9.9, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True), (5.0, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, False), - ( - -10.0, - {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, - False, - ), + (-10.0, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, False), (9.9, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True), (-5.1, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True), # --- Edge cases --- @@ -284,10 +276,10 @@ class TestBoundedNumeric: (0.2999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, True), (-0.2999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, True), (0.0999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False), - (-0.0999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False), + (-0.0999, {"type": "number", "minimum": -.3, "maximum": -0.1}, False), (0.3001, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False), (-0.3001, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False), - ], + ] ) def test_numeric_validation(self, instance, schema, should_pass): # Sanity check @@ -297,7 +289,10 @@ def test_numeric_validation(self, instance, schema, should_pass): else: with pytest.raises(ValidationError): validate(instance, schema=schema) - check_match_failure(bad_string=json_dumps(instance), schema_obj=schema) + check_match_failure( + bad_string=json_dumps(instance), + schema_obj=schema + ) class TestString: @@ -378,7 +373,9 @@ def test_regex_bad(self, bad_string: str, good_bytes, failure_byte, allowed_byte schema_obj=schema_obj, ) - @pytest.mark.parametrize("string", ["aA\u001f", '"""']) + @pytest.mark.parametrize( + "string", ["aA\u001f", '"""'] + ) def test_regex_properly_escaped_good(self, string): schema_obj = {"type": "string", "pattern": r".{3}"} # First sanity check what we're setting up @@ -391,15 +388,13 @@ def test_regex_properly_escaped_good(self, string): [ ( '"\\u001f\\u001f\u001f', - b'"\\u001f\\u001f', # able to match the first two stringified bytes - "\u001f".encode(), # fails on a literal \x1f byte - None, # hard to write a set of allowed bytes here + b'"\\u001f\\u001f', # able to match the first two stringified bytes + '\u001f'.encode(), # fails on a literal \x1f byte + None # hard to write a set of allowed bytes here ), ], ) - def test_regex_properly_escaped_bad( - self, bad_string: str, good_bytes, failure_byte, allowed_bytes - ): + def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_byte, allowed_bytes): # Note that the strings being fed in include the double quotes required # to make them JSON strings schema_obj = {"type": "string", "pattern": r".{3}"} @@ -411,6 +406,7 @@ def test_regex_properly_escaped_bad( schema_obj=schema_obj, ) + @pytest.mark.parametrize( "my_string", ["a", "bb", "ccc", "150", ",?", ".\t\n", "(){", "aA7", "\\9O"] ) @@ -753,31 +749,25 @@ def test_required_is_required(self): generate_and_check({"b": 1}, schema) generate_and_check({"a": 1, "b": "xyz"}, schema) check_match_failure( - bad_string=json_dumps({"a": 1}), + bad_string=json_dumps( + {"a": 1} + ), schema_obj=schema, ) def test_validated_against_additionalProperties(self): - schema = { - "type": "object", - "properties": {"a": {"type": "integer"}}, - "required": ["b"], - "additionalProperties": {"type": "integer"}, - } + schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b"], "additionalProperties": {"type": "integer"}} generate_and_check({"b": 1}, schema) generate_and_check({"a": 1, "b": 42}, schema) check_match_failure( - bad_string=json_dumps({"a": 1, "b": "string"}), + bad_string=json_dumps( + {"a": 1, "b": "string"} + ), schema_obj=schema, ) def test_false_additionalProperties_fails(self): - schema = { - "type": "object", - "properties": {"a": {"type": "integer"}}, - "required": ["b", "c"], - "additionalProperties": False, - } + schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b", "c"], "additionalProperties": False} with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ( @@ -850,6 +840,7 @@ def test_object_list(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( ["bad_string", "good_bytes", "failure_byte", "allowed_bytes"], [ @@ -1010,6 +1001,7 @@ def test_good_with_items(self, min_items, max_items, target_obj): } generate_and_check(target_obj, schema_obj) + @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1090,6 +1082,7 @@ def test_bad_with_prefix_and_items( schema_obj=schema_obj, ) + @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1154,6 +1147,7 @@ def test_bad_with_prefix( schema_obj=schema_obj, ) + @pytest.mark.parametrize( "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1370,12 +1364,13 @@ def test_allOf_ref(self): generate_and_check(target_obj, schema_obj) def test_allOf_bad_schema(self): - schema = {"allOf": [{"type": "integer"}, {"type": "string"}]} + schema = { + "allOf" : [{ "type": "integer" }, { "type": "string" }] + } with pytest.raises(ValueError) as ve: _ = gen_json(schema=schema) assert ve.value.args[0] == "allOf has conflicting types: [{'integer'}, {'string'}]" - class TestOneOf: @pytest.mark.parametrize("target_obj", [123, 42]) def test_oneOf_simple(self, target_obj): @@ -1390,6 +1385,7 @@ def test_oneOf_simple(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) + @pytest.mark.parametrize("target_obj", [123, True]) def test_oneOf_compound(self, target_obj): schema = """{ @@ -1427,6 +1423,7 @@ def test_enum(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1446,6 +1443,7 @@ def test_bad_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes): schema_obj=schema_obj, ) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1473,10 +1471,13 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes) ("2", False), ("1", False), (True, False), - ], + ] ) def test_typed_enum_single_type(self, obj, valid): - schema_obj = {"enum": [1, "2", True], "type": "integer"} + schema_obj = { + "enum": [1, "2", True], + "type": "integer" + } if valid: validate(instance=obj, schema=schema_obj) generate_and_check(obj, schema_obj) @@ -1493,10 +1494,13 @@ def test_typed_enum_single_type(self, obj, valid): ("2", True), ("1", False), (True, False), - ], + ] ) def test_typed_enum_multiple_types(self, obj, valid): - schema_obj = {"enum": [1, "2", True], "type": ["integer", "string"]} + schema_obj = { + "enum": [1, "2", True], + "type": ["integer", "string"] + } if valid: validate(instance=obj, schema=schema_obj) generate_and_check(obj, schema_obj) @@ -1506,12 +1510,14 @@ def test_typed_enum_multiple_types(self, obj, valid): check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj) def test_invalid_typed_enum(self): - schema_obj = {"enum": [1, "2"], "type": "boolean"} + schema_obj = { + "enum": [1, "2"], + "type": "boolean" + } with pytest.raises(ValueError) as ve: gen_json(schema=schema_obj) assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']" - class TestConst: def test_constant_int(self): # First sanity check what we're setting up @@ -1571,29 +1577,45 @@ def test_constant_precedence(self): ) def test_valid_typed_const(self): - schema_obj = {"const": 1, "type": "integer"} + schema_obj = { + "const": 1, + "type": "integer" + } target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) def test_invalid_typed_const(self): - schema_obj = {"const": 1, "type": "boolean"} + schema_obj = { + "const": 1, + "type": "boolean" + } with pytest.raises(ValidationError): gen_json(schema=schema_obj) def test_valid_enum_const(self): - schema_obj = {"const": 1, "enum": [1, 2, 3]} + schema_obj = { + "const": 1, + "enum": [1, 2, 3] + } target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) def test_invalid_enum_const(self): - schema_obj = {"const": 1, "enum": [2, 3]} + schema_obj = { + "const": 1, + "enum": [2, 3] + } with pytest.raises(ValidationError): gen_json(schema=schema_obj) def test_valid_typed_enum_const(self): - schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"} + schema_obj = { + "const": 1, + "enum": [1, "2", 3], + "type": "integer" + } target_obj = 1 validate(instance=target_obj, schema=schema_obj) generate_and_check(target_obj, schema_obj) @@ -1601,13 +1623,17 @@ def test_valid_typed_enum_const(self): @pytest.mark.parametrize( "const", [ - "2", # right enum, wrong type - 2, # wrong enum, right type - "3", # wrong enum, wrong type - ], + "2", # right enum, wrong type + 2, # wrong enum, right type + "3", # wrong enum, wrong type + ] ) def test_invalid_typed_enum_const(self, const): - schema_obj = {"const": const, "enum": [1, "2", 3], "type": "integer"} + schema_obj = { + "const": const, + "enum": [1, "2", 3], + "type": "integer" + } with pytest.raises(ValidationError): gen_json(schema=schema_obj) @@ -1655,15 +1681,11 @@ def test_simple_additional_properties(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ - ( - {"a": "1"}, - b'{"a": ', - b'"', - INTEGER_LEADING, - ), + ({"a": "1"}, b'{"a": ', b'"', INTEGER_LEADING, ), ( {"a": 1, "b": 1.5}, b'{"a": 1, "b": 1', @@ -1683,7 +1705,9 @@ def test_simple_bad_type(self, bad_obj, good_bytes, failure_byte, allowed_bytes) schema_obj=schema_obj, ) - @pytest.mark.parametrize("target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}]) + @pytest.mark.parametrize( + "target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}] + ) def test_anyOf_additional_properties(self, target_obj): # First sanity check what we're setting up schema_obj = json.loads(self.anyOf_schema) @@ -1692,6 +1716,7 @@ def test_anyOf_additional_properties(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1733,6 +1758,7 @@ def test_properties_and_additional_properties(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1741,7 +1767,9 @@ def test_properties_and_additional_properties(self, target_obj, temperature): ({"a": 1, "b": 2}, b'{"', b"a", {b"m"}), ], ) - def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, allowed_bytes): + def test_combined_missing_properties( + self, bad_obj, good_bytes, failure_byte, allowed_bytes + ): schema_obj = json.loads(self.combined_schema) bad_string = json_dumps(bad_obj) check_match_failure( @@ -1752,6 +1780,7 @@ def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, al schema_obj=schema_obj, ) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -1880,6 +1909,7 @@ def test_empty_schema(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_string, good_bytes, failure_byte, allowed_bytes", [ @@ -1908,7 +1938,9 @@ def test_empty_schema(self, target_obj, temperature): ), ], ) - def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_bytes): + def test_bad_empty_schema( + self, bad_string, good_bytes, failure_byte, allowed_bytes + ): schema_obj = json.loads(self.empty_schema) check_match_failure( bad_string=bad_string, @@ -1924,12 +1956,7 @@ def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_by # Empty property {"type": "object", "properties": {"a": {}}, "required": ["a"]}, # Empty reference - { - "type": "object", - "properties": {"a": {"$ref": "#/$defs/A"}}, - "$defs": {"A": {}}, - "required": ["a"], - }, + {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]}, ], ) @pytest.mark.parametrize( @@ -1960,14 +1987,10 @@ def test_nested_empty_schema(self, schema_obj, target_obj, temperature): # Empty property {"type": "object", "properties": {"a": {}}, "required": ["a"]}, # Empty reference - { - "type": "object", - "properties": {"a": {"$ref": "#/$defs/A"}}, - "$defs": {"A": {}}, - "required": ["a"], - }, + {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]}, ], ) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -2010,6 +2033,7 @@ def test_nested_empty_schema_with_props(self, target_obj, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize( "bad_obj, good_bytes, failure_byte, allowed_bytes", [ @@ -2044,6 +2068,7 @@ def test_items(self, schema_obj): [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj ) + def test_no_items(self): schema_obj = {"type": "array", "items": False} check_match_failure( @@ -2076,6 +2101,7 @@ def test_additionalProperties(self, schema_obj): schema_obj, ) + def test_no_additionalProperties(self): schema_obj = {"type": "object", "additionalProperties": False} check_match_failure( @@ -2086,17 +2112,17 @@ def test_no_additionalProperties(self): schema_obj=schema_obj, ) - def test_ignored_keys_allowed_as_properties(): schema_obj = { "type": "object", - "properties": {key: {"type": "string"} for key in IGNORED_KEYS}, + "properties": { + key: {"type": "string"} for key in IGNORED_KEYS + }, "required": list(IGNORED_KEYS), } target_obj = {key: "value" for key in IGNORED_KEYS} generate_and_check(target_obj, schema_obj) - class TestRequiredProperties: schema_obj = { "type": "object", @@ -2105,19 +2131,10 @@ class TestRequiredProperties: "b": {"type": "number"}, "c": {"type": "boolean"}, }, - "additionalProperties": True, + "additionalProperties": True } ALL_REQUIRED = ["a", "b", "c"] - SOME_REQUIRED_SUBSETS = [ - [], - ["a"], - ["b"], - ["c"], - ["a", "b"], - ["a", "c"], - ["b", "c"], - ["a", "b", "c"], - ] + SOME_REQUIRED_SUBSETS = [[], ["a"], ["b"], ["c"], ["a", "b"], ["a", "c"], ["b", "c"], ["a", "b", "c"]] NONE_REQUIRED: list[str] = [] @pytest.mark.parametrize( @@ -2126,7 +2143,7 @@ class TestRequiredProperties: {}, {"d": "hello"}, {"d": 42, "e": True}, - ], + ] ) def test_all_required_good(self, extra_items): schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED} @@ -2146,7 +2163,7 @@ def test_all_required_good(self, extra_items): ({"c": True}), # Missing all ({}), - ], + ] ) def test_all_required_bad(self, bad_obj): schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED} @@ -2161,7 +2178,7 @@ def test_all_required_bad(self, bad_obj): {}, {"d": "hello"}, {"d": 42, "e": True}, - ], + ] ) @pytest.mark.parametrize( "required", @@ -2199,7 +2216,7 @@ def test_some_required_bad(self, required): {}, {"d": "hello"}, {"d": 42, "e": True}, - ], + ] ) @pytest.mark.parametrize( "target_obj", @@ -2212,48 +2229,55 @@ def test_some_required_bad(self, required): {"a": "hello", "c": True}, {"b": 42, "c": True}, {"a": "hello", "b": 42, "c": True}, - ], + ] ) def test_none_required(self, target_obj, extra_items): schema_obj = {**self.schema_obj, "required": self.NONE_REQUIRED} generate_and_check({**target_obj, **extra_items}, schema_obj) - class TestRequiredPropertiesScaling: - @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100]) + @pytest.mark.parametrize( + "num_properties", + [1, 2, 3, 4, 5, 10, 20, 50, 100] + ) def test_many_optional_properties_doesnt_blow_up(self, num_properties): schema_obj = { "type": "object", - "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)}, - "required": [], # Empty should be worst-case scenario + "properties": { + f"prop_{i}": {"type": "string"} for i in range(num_properties) + }, + "required": [] # Empty should be worst-case scenario } from guidance.library._json import GenJson - genjson = GenJson(schema=schema_obj) genjson._join.__wrapped__.cache_clear() _ = genjson.root() cache_info = genjson._join.__wrapped__.cache_info() # Theoretical number of cache misses under the current implementation - expected_misses = 2 * num_properties - 1 - MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from? + expected_misses = 2*num_properties - 1 + MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from? assert 0 < cache_info.misses <= expected_misses + MISSES_MAGIC_NUMBER # NOTE: that if the cache maxsize is hit, the number of misses will be more than expected # Theoretical number of total calls under the current implementation - expected_calls = num_properties * (num_properties - 1) // 2 - CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from? + expected_calls = num_properties*(num_properties - 1) // 2 + CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from? assert 0 < cache_info.hits + cache_info.misses <= expected_calls + CALLS_MAGIC_NUMBER - @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100]) + @pytest.mark.parametrize( + "num_properties", + [1, 2, 3, 4, 5, 10, 20, 50, 100] + ) def test_all_required_properties_doesnt_blow_up(self, num_properties): schema_obj = { "type": "object", - "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)}, - "required": [f"prop_{i}" for i in range(num_properties)], + "properties": { + f"prop_{i}": {"type": "string"} for i in range(num_properties) + }, + "required": [f"prop_{i}" for i in range(num_properties)] } from guidance.library._json import GenJson - genjson = GenJson(schema=schema_obj) genjson._join.__wrapped__.cache_clear() _ = genjson.root() @@ -2281,7 +2305,7 @@ class TestBooleanSchema: {"a": [1, 2, 3]}, {"a": {"b": 1}}, False, - True, + True ], ) def test_true_schema(self, target_obj): @@ -2319,7 +2343,7 @@ class TestWhitespace: ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}), # Static object: const (both item and key seps) ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}), - ], + ] ) @pytest.mark.parametrize( "separators", @@ -2345,7 +2369,7 @@ def test_separators(self, separators, schema, obj): ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}), # Static object: const (both item and key seps) ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}), - ], + ] ) @pytest.mark.parametrize( "separators", diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py index f2248129d..fd1136058 100644 --- a/tests/unit/library/json/test_refs.py +++ b/tests/unit/library/json/test_refs.py @@ -1,10 +1,9 @@ -from json import dumps as json_dumps - import pytest from jsonschema import ValidationError, validate -from .utils import check_match_failure, generate_and_check +from json import dumps as json_dumps +from .utils import check_match_failure, generate_and_check class TestRefs: @pytest.mark.parametrize( @@ -439,15 +438,9 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct( # invalid on inner field ({"bar": "a", "foo": {"bar": 1}}, False), # invalid on outer field - ({"bar": 1, "foo": {"bar": "a"}}, False), + ({ "bar": 1, "foo": {"bar": "a"}}, False), # valid on both fields - ( - { - "bar": "a", - "foo": {"bar": "a"}, - }, - True, - ), + ({"bar": "a", "foo": {"bar": "a"}, }, True), ], ) def test_refs_with_relative_uris_and_defs(self, test_object, valid): @@ -981,4 +974,4 @@ def test_empty_tokens_in_ref_json_pointer(self, test_object, valid): else: with pytest.raises(ValidationError): validate(instance=test_object, schema=schema) - check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) + check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema) \ No newline at end of file diff --git a/tests/unit/library/json/test_string_format.py b/tests/unit/library/json/test_string_format.py index 7b2dd9bdc..09712fb45 100644 --- a/tests/unit/library/json/test_string_format.py +++ b/tests/unit/library/json/test_string_format.py @@ -1,8 +1,7 @@ """Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/optional/format""" -import json - import pytest +import json from .utils import check_match_failure, generate_and_check @@ -45,35 +44,17 @@ def test_good(self, target_str): "bad_str", [ '"2020-01-32"', # a invalid date string with 32 days in January - pytest.param( - '"2021-02-29"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 29 days in February (normal) - pytest.param( - '"2020-02-30"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 30 days in February (leap) + pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 29 days in February (normal) + pytest.param('"2020-02-30"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 30 days in February (leap) '"2020-03-32"', # a invalid date string with 32 days in March - pytest.param( - '"2020-04-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 31 days in April + pytest.param('"2020-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in April '"2020-05-32"', # a invalid date string with 32 days in May - pytest.param( - '"2020-06-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 31 days in June + pytest.param('"2020-06-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in June '"2020-07-32"', # a invalid date string with 32 days in July '"2020-08-32"', # a invalid date string with 32 days in August - pytest.param( - '"2020-09-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 31 days in September + pytest.param('"2020-09-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in September '"2020-10-32"', # a invalid date string with 32 days in October - pytest.param( - '"2020-11-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # a invalid date string with 31 days in November + pytest.param('"2020-11-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # a invalid date string with 31 days in November '"2020-12-32"', # a invalid date string with 32 days in December '"2020-13-01"', # a invalid date string with invalid month '"06/19/1963"', # an invalid date string @@ -81,13 +62,8 @@ def test_good(self, target_str): '"1998-1-20"', # non-padded month dates are not valid '"1998-01-1"', # non-padded day dates are not valid '"1998-13-01"', # invalid month - pytest.param( - '"1998-04-31"', - marks=pytest.mark.xfail(reason="number of days not yet tied to month"), - ), # invalid month-day combination - pytest.param( - '"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard") - ), # 2021 is not a leap year + pytest.param('"1998-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")), # invalid month-day combination + pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")), # 2021 is not a leap year '"1963-06-1\\u09ea"', # invalid non-ASCII '৪' (a Bengali 4) '"20230328"', # ISO8601 / non-RFC3339: YYYYMMDD without dashes (2023-03-28) '"2023-W01"', # ISO8601 / non-RFC3339: week number implicit day of week (2023-01-02) @@ -161,7 +137,6 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) - @pytest.mark.xfail(reason="idn-hostname format not implemented") class TestIdnHostname: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"idn-hostname"}' @@ -325,7 +300,6 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) - @pytest.mark.xfail(reason="iri-reference format is not yet implemented") class TestIriReference: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"iri-reference"}' @@ -515,40 +489,20 @@ def test_good(self, target_str): '"008:030:006Z"', # invalid time string with extra leading zeros '"8:3:6Z"', # invalid time string with no leading zero for single digit '"8:0030:6Z"', # hour, minute, second must be two digits - pytest.param( - '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, Zulu (wrong hour) - pytest.param( - '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, Zulu (wrong minute) - pytest.param( - '"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, zero time-offset (wrong hour) - pytest.param( - '"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, zero time-offset (wrong minute) - pytest.param( - '"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, positive time-offset (wrong hour) - pytest.param( - '"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, positive time-offset (wrong minute) - pytest.param( - '"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, negative time-offset (wrong hour) - pytest.param( - '"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # invalid leap second, negative time-offset (wrong minute) + pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, Zulu (wrong hour) + pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, Zulu (wrong minute) + pytest.param('"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, zero time-offset (wrong hour) + pytest.param('"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, zero time-offset (wrong minute) + pytest.param('"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, positive time-offset (wrong hour) + pytest.param('"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, positive time-offset (wrong minute) + pytest.param('"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, negative time-offset (wrong hour) + pytest.param('"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # invalid leap second, negative time-offset (wrong minute) '"08:30:06-8:000"', # hour, minute in time-offset must be two digits '"24:00:00Z"', # an invalid time string with invalid hour '"00:60:00Z"', # an invalid time string with invalid minute '"00:00:61Z"', # an invalid time string with invalid second - pytest.param( - '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # an invalid time string with invalid leap second (wrong hour) - pytest.param( - '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # an invalid time string with invalid leap second (wrong minute) + pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid time string with invalid leap second (wrong hour) + pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid time string with invalid leap second (wrong minute) '"01:02:03+24:00"', # an invalid time string with invalid time numoffset hour '"01:02:03+00:60"', # an invalid time string with invalid time numoffset minute '"01:02:03Z+00:30"', # an invalid time string with invalid time with both Z and numoffset @@ -584,23 +538,11 @@ class TestIpv6: '"::42:ff:1"', # leading colons is valid '"d6::"', # trailing colons is valid '"1:d6::42"', # single set of double colons in the middle is valid - pytest.param( - '"1::d6:192.168.0.1"', - marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), - ), # mixed format with the ipv4 section as decimal octets - pytest.param( - '"1:2::192.168.0.1"', - marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), - ), # mixed format with double colons between the sections - pytest.param( - '"::ffff:192.168.0.1"', - marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), - ), # mixed format with leading double colons (ipv4-mapped ipv6 address) + pytest.param('"1::d6:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with the ipv4 section as decimal octets + pytest.param('"1:2::192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with double colons between the sections + pytest.param('"::ffff:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # mixed format with leading double colons (ipv4-mapped ipv6 address) '"1:2:3:4:5:6:7:8"', # 8 octets - pytest.param( - '"1000:1000:1000:1000:1000:1000:255.255.255.255"', - marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"), - ), # a long valid ipv6 + pytest.param('"1000:1000:1000:1000:1000:1000:255.255.255.255"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")), # a long valid ipv6 ], ) def test_good(self, target_str): @@ -768,22 +710,11 @@ class TestEmail: '"te~st@example.com"', # tilde in local part is valid '"~test@example.com"', # tilde before local part is valid '"test~@example.com"', # tilde after local part is valid - pytest.param( - '"\\"joe bloggs\\"@example.com"', - marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), - ), # a quoted string with a space in the local part is valid - pytest.param( - '"\\"joe..bloggs\\"@example.com"', - marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), - ), # a quoted string with a double dot in the local part is valid - pytest.param( - '"\\"joe@bloggs\\"@example.com"', - marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"), - ), # a quoted string with a @ in the local part is valid + pytest.param('"\\"joe bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a space in the local part is valid + pytest.param('"\\"joe..bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a double dot in the local part is valid + pytest.param('"\\"joe@bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")), # a quoted string with a @ in the local part is valid '"joe.bloggs@[127.0.0.1]"', # an IPv4-address-literal after the @ is valid - pytest.param( - '"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard") - ), # an IPv6-address-literal after the @ is valid + pytest.param('"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")), # an IPv6-address-literal after the @ is valid '"te.s.t@example.com"', # two separated dots inside local part are valid '"riedgar+guidance@example.com"', # plus sign in local part is valid ], @@ -929,16 +860,9 @@ def test_good(self, target_str): "bad_str", [ '"1998-12-31T23:59:61Z"', # an invalid date-time past leap second, UTC - pytest.param( - '"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # an invalid date-time with leap second on a wrong minute, UTC - pytest.param( - '"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard") - ), # an invalid date-time with leap second on a wrong hour, UTC - pytest.param( - '"1990-02-31T15:59:59.123-08:00"', - marks=pytest.mark.xfail(reason="valid days not yet tied to month"), - ), # an invalid day in date-time string + pytest.param('"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid date-time with leap second on a wrong minute, UTC + pytest.param('"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")), # an invalid date-time with leap second on a wrong hour, UTC + pytest.param('"1990-02-31T15:59:59.123-08:00"', marks=pytest.mark.xfail(reason="valid days not yet tied to month")), # an invalid day in date-time string '"1990-12-31T15:59:59-24:00"', # an invalid offset in date-time string '"1963-06-19T08:30:06.28123+01:00Z"', # an invalid closing Z after time-zone offset '"06/19/1963 08:30:06 PST"', # an invalid date-time string @@ -953,7 +877,6 @@ def test_bad(self, bad_str): schema_obj = json.loads(self.schema) check_match_failure(bad_string=bad_str, schema_obj=schema_obj) - @pytest.mark.xfail(reason="regex format not implemented") class TestRegex: schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"regex"}' diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py index 5498d718c..d75c41d4b 100644 --- a/tests/unit/library/json/utils.py +++ b/tests/unit/library/json/utils.py @@ -1,7 +1,6 @@ import json from functools import partial -from json import dumps as json_dumps -from json import loads as json_loads +from json import loads as json_loads, dumps as json_dumps from typing import Any, Optional, Union from jsonschema import validate @@ -9,15 +8,18 @@ from guidance import json as gen_json from guidance.library._json import JSONSchema -from ....utils import check_match_failure as _check_match_failure -from ....utils import check_run_with_temperature -from ....utils import generate_and_check as _generate_and_check +from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check + +from jsonschema import validate + + +import json +from functools import partial +from json import dumps as json_dumps, loads as json_loads def generate_and_check( - target_obj: Any, - schema_obj: Union[str, JSONSchema], - desired_temperature: Optional[float] = None, + target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None ): if isinstance(schema_obj, str): schema_obj = json_loads(schema_obj) @@ -30,7 +32,9 @@ def generate_and_check( # Now test that the grammar can recognize and generate prepared_json # We partial in the grammar_callable if desired_temperature is not None: - grammar_callable = partial(gen_json, schema=schema_obj, temperature=desired_temperature) + grammar_callable = partial( + gen_json, schema=schema_obj, temperature=desired_temperature + ) else: grammar_callable = partial(gen_json, schema=schema_obj) @@ -57,4 +61,4 @@ def check_match_failure( failure_byte=failure_byte, allowed_bytes=allowed_bytes, grammar=grammar, - ) + ) \ No newline at end of file