From 17e0140fe147690ecb1c81dbb89dabc8efc6e94e Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 29 Oct 2024 18:49:19 -0700
Subject: [PATCH 01/70] refactor oneOf, allOf

---
 guidance/library/_json.py | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 0ca544726..fae9d62d7 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -722,6 +722,29 @@ def anyOf(
         options = [self.json(json_schema=item) for item in anyof_list]
         return lm + select(options)
 
+    @guidance(stateless=True)
+    def oneOf(
+        self,
+        lm,
+        *,
+        oneof_list: Sequence[JSONSchema],
+    ):
+        if len(oneof_list) == 1:
+            return lm + self.json(json_schema=oneof_list[0])
+        warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")
+        return lm + self.anyOf(anyof_list=oneof_list)
+
+    @guidance(stateless=True)
+    def allOf(
+        self,
+        lm,
+        *,
+        allof_list: Sequence[JSONSchema],
+    ):
+        if len(allof_list) != 1:
+            raise ValueError("Only support allOf with exactly one item")
+        return lm + self.json(json_schema=allof_list[0])
+
     @guidance(stateless=True)
     def const(
         self,
@@ -842,20 +865,13 @@ def json(
             sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF)
             if sibling_keys:
                 raise NotImplementedError(f"allOf with sibling keys is not yet supported. Got {sibling_keys}")
-            allof_list = json_schema[Keyword.ALLOF]
-            if len(allof_list) != 1:
-                raise ValueError("Only support allOf with exactly one item")
-            return lm + self.json(json_schema=allof_list[0])
+            return lm + self.allOf(allof_list=json_schema[Keyword.ALLOF])
 
         if Keyword.ONEOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF)
             if sibling_keys:
                 raise NotImplementedError(f"oneOf with sibling keys is not yet supported. Got {sibling_keys}")
-            oneof_list = json_schema[Keyword.ONEOF]
-            if len(oneof_list) == 1:
-                return lm + self.json(json_schema=oneof_list[0])
-            warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")
-            return lm + self.anyOf(anyof_list=oneof_list)
+            return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF])
 
         if Keyword.REF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.REF)

From dfa5d2583428a11ec5fe9c41eb10b28dd85c9d19 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 29 Oct 2024 20:58:11 -0700
Subject: [PATCH 02/70] allOf tests from the JSON Schema test suite

---
 tests/unit/library/test_json_allOf.py | 285 ++++++++++++++++++++++++++
 1 file changed, 285 insertions(+)
 create mode 100644 tests/unit/library/test_json_allOf.py

diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py
new file mode 100644
index 000000000..6ef14aee3
--- /dev/null
+++ b/tests/unit/library/test_json_allOf.py
@@ -0,0 +1,285 @@
+from json import dumps as json_dumps
+
+import pytest
+from jsonschema import ValidationError, validate
+
+from .test_json import check_match_failure, generate_and_check
+
+
+class TestDynamicRefs:
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # allOf
+            ({"foo": "baz", "bar": 2}, True),
+            # mismatch second
+            ({"foo": "baz"}, False),
+            # mismatch first
+            ({"bar": 2}, False),
+            # wrong type
+            ({"foo": "baz", "bar": "quux"}, False),
+        ],
+    )
+    def test_allOf(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [
+                {"properties": {"bar": {"type": "integer"}}, "required": ["bar"]},
+                {"properties": {"foo": {"type": "string"}}, "required": ["foo"]},
+            ],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # valid
+            ({"foo": "quux", "bar": 2, "baz": None}, True),
+            # mismatch base schema
+            ({"foo": "quux", "baz": None}, False),
+            # mismatch first allOf
+            ({"bar": 2, "baz": None}, False),
+            # mismatch second allOf
+            ({"foo": "quux", "bar": 2}, False),
+            # mismatch both
+            ({"bar": 2}, False),
+        ],
+    )
+    def test_allOf_with_base_schema(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "properties": {"bar": {"type": "integer"}},
+            "required": ["bar"],
+            "allOf": [
+                {"properties": {"foo": {"type": "string"}}, "required": ["foo"]},
+                {"properties": {"baz": {"type": "null"}}, "required": ["baz"]},
+            ],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # valid
+            (25, True),
+            # mismatch one
+            (35, False),
+        ],
+    )
+    def test_allOf_simple_types(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [{"maximum": 30}, {"minimum": 20}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # any value is valid
+            ("foo", True)
+        ],
+    )
+    def test_allOf_with_boolean_schemas_all_true(self, test_object, valid):
+        schema = {"$schema": "https://json-schema.org/draft/2020-12/schema", "allOf": [True, True]}
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # any value is invalid
+            ("foo", False)
+        ],
+    )
+    def test_allOf_with_boolean_schemas_some_false(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [True, False],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # any value is invalid
+            ("foo", False)
+        ],
+    )
+    def test_allOf_with_boolean_schemas_all_false(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [False, False],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # any data is valid
+            (1, True)
+        ],
+    )
+    def test_allOf_with_one_empty_schema(self, test_object, valid):
+        schema = {"$schema": "https://json-schema.org/draft/2020-12/schema", "allOf": [{}]}
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # any data is valid
+            (1, True)
+        ],
+    )
+    def test_allOf_with_two_empty_schemas(self, test_object, valid):
+        schema = {"$schema": "https://json-schema.org/draft/2020-12/schema", "allOf": [{}, {}]}
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # number is valid
+            (1, True),
+            # string is invalid
+            ("foo", False),
+        ],
+    )
+    def test_allOf_with_the_first_empty_schema(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [{}, {"type": "number"}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # number is valid
+            (1, True),
+            # string is invalid
+            ("foo", False),
+        ],
+    )
+    def test_allOf_with_the_last_empty_schema(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [{"type": "number"}, {}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # null is valid
+            (None, True),
+            # anything non-null is invalid
+            (123, False),
+        ],
+    )
+    def test_nested_allOf_to_check_validation_semantics(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [{"allOf": [{"type": "null"}]}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # allOf: false, anyOf: false, oneOf: false
+            (1, False),
+            # allOf: false, anyOf: false, oneOf: true
+            (5, False),
+            # allOf: false, anyOf: true, oneOf: false
+            (3, False),
+            # allOf: false, anyOf: true, oneOf: true
+            (15, False),
+            # allOf: true, anyOf: false, oneOf: false
+            (2, False),
+            # allOf: true, anyOf: false, oneOf: true
+            (10, False),
+            # allOf: true, anyOf: true, oneOf: false
+            (6, False),
+            # allOf: true, anyOf: true, oneOf: true
+            (30, True),
+        ],
+    )
+    def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [{"multipleOf": 2}],
+            "anyOf": [{"multipleOf": 3}],
+            "oneOf": [{"multipleOf": 5}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)

From b505f1d8a8433e6bcd9ca80515c121ebab5e1647 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 29 Oct 2024 20:59:25 -0700
Subject: [PATCH 03/70] prototype allOf

---
 guidance/library/_json.py | 148 +++++++++++++++++++++++++++++++++++---
 1 file changed, 138 insertions(+), 10 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index fae9d62d7..9974584d9 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -739,11 +739,91 @@ def allOf(
         self,
         lm,
         *,
-        allof_list: Sequence[JSONSchema],
+        parent_schema: JSONSchema,
     ):
-        if len(allof_list) != 1:
-            raise ValueError("Only support allOf with exactly one item")
-        return lm + self.json(json_schema=allof_list[0])
+        type = set(JSONType)
+        properties = {}
+        required = set()
+        additional_properties_list = []
+        other_data = {}
+
+        def handle_keyword(key: str, value: Any):
+            nonlocal type
+            nonlocal required
+
+            if key == Keyword.REF:
+                raise NotImplementedError("allOf with $ref is not yet supported")
+
+            elif key == Keyword.TYPE:
+                # TODO: Need to handle type-narrowing correctly: if we have a "number" and an "integer", we should only keep "integer".
+                # For now, we'll just intersect the types.
+                value = cast(Union[str, Sequence[str]], value)
+                if isinstance(value, str):
+                    type = {value}
+                else:
+                    type &= set(value)
+                # Throw an error early if we have conflicting types
+                if not type:
+                    raise ValueError("allOf with conflicting types")
+
+            elif key == Keyword.ALLOF:
+                value = cast(Sequence[JSONSchema], value)
+                for schema in value:
+                    add_schema(schema)
+
+            elif key == ObjectKeywords.PROPERTIES:
+                value = cast(Mapping[str, JSONSchema], value)
+                for name, schema in value.items():
+                    if name in properties:
+                        # Will be recursively merged later
+                        properties[name] = {"allOf": [properties[name], schema]}
+                    else:
+                        properties[name] = schema
+
+            elif key == ObjectKeywords.REQUIRED:
+                value = cast(Sequence[str], value)
+                required |= set(value)
+
+            elif key == ObjectKeywords.ADDITIONAL_PROPERTIES:
+                value = cast(JSONSchema, value)
+                additional_properties_list.append(value)
+
+            elif key in set(Keyword):
+                # If we've done our job right, we should never hit this case...
+                raise NotImplementedError(f"Don't yet know how to handle {key} in allOf")
+
+            elif key in other_data:
+                raise NotImplementedError(f"Don't yet know how to reduce multiple values of {key!r} in allOf")
+
+            else:
+                other_data[key] = value
+
+        def add_schema(schema: JSONSchema):
+            nonlocal type
+            if schema is True:
+                return
+            if schema is False:
+                raise ValueError("allOf contains a False schema")
+            for key, value in schema.items():
+                if key in IGNORED_KEYS:
+                    continue
+                handle_keyword(key, value)
+
+        add_schema(parent_schema)
+
+        combined_schema = {
+            Keyword.TYPE: type,
+            **other_data
+        }
+        if properties:
+            combined_schema[ObjectKeywords.PROPERTIES] = properties
+        if required:
+            combined_schema[ObjectKeywords.REQUIRED] = required
+        if additional_properties_list:
+            combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list}
+
+        return lm + self.json(json_schema=combined_schema)
+
 
     @guidance(stateless=True)
     def const(
@@ -855,18 +935,66 @@ def json(
 
         validate_json_node_keys(json_schema)
 
+        if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema:
+            parent_schema = json_schema.copy()
+            anyof_list = parent_schema.pop(Keyword.ANYOF)
+            allof_list = parent_schema.pop(Keyword.ALLOF)
+            oneof_list = parent_schema.pop(Keyword.ONEOF)
+            # Reduce the problem to a oneOf of anyOfs of allOfs
+            return lm + self.oneOf(
+                oneof_list=[
+                    {"anyOf": [
+                        {"allOf": [one_item, any_item, *allof_list]}
+                        for any_item in anyof_list
+                    ]}
+                    for one_item in oneof_list
+                ]
+            )
+
+        if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema:
+            parent_schema = json_schema.copy()
+            anyof_list = parent_schema.pop(Keyword.ANYOF)
+            allof_list = parent_schema.pop(Keyword.ALLOF)
+            # Reduce the problem to an anyOf of allOfs
+            return lm + self.anyOf(
+                anyof_list=[
+                    {"allOf": [any_item, *allof_list]}
+                    for any_item in anyof_list
+                ]
+            )
+
+        if Keyword.ALLOF in json_schema and Keyword.ONEOF in json_schema:
+            parent_schema = json_schema.copy()
+            oneof_list = parent_schema.pop(Keyword.ONEOF)
+            allof_list = parent_schema.pop(Keyword.ALLOF)
+            # Reduce the problem to a oneOf of allOfs
+            return lm + self.oneOf(
+                anyof_list=[
+                    {"allOf": [one_item, *allof_list]}
+                    for one_item in oneof_list
+                ]
+            )
+
+        if Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema:
+            parent_schema = json_schema.copy()
+            oneof_list = parent_schema.pop(Keyword.ONEOF)
+            anyof_list = parent_schema.pop(Keyword.ANYOF)
+            # Reduce the problem to a oneOf of anyOfs
+            return lm + self.oneOf(
+                oneof_list=[
+                    {"anyOf": anyof_list}
+                ]
+            )
+
+        if Keyword.ALLOF in json_schema:
+            return lm + self.allOf(parent_schema=json_schema)
+
         if Keyword.ANYOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF)
             if sibling_keys:
                 raise NotImplementedError(f"anyOf with sibling keys is not yet supported. Got {sibling_keys}")
             return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF])
 
-        if Keyword.ALLOF in json_schema:
-            sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF)
-            if sibling_keys:
-                raise NotImplementedError(f"allOf with sibling keys is not yet supported. Got {sibling_keys}")
-            return lm + self.allOf(allof_list=json_schema[Keyword.ALLOF])
-
         if Keyword.ONEOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF)
             if sibling_keys:

From c43c76c6e3776e0ba298b19258946190664830f8 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 09:27:12 -0700
Subject: [PATCH 04/70] fix nesting of sibling oneOf, allOf, anyOf

---
 guidance/library/_json.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 9974584d9..afde1d624 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -965,11 +965,11 @@ def json(
 
         if Keyword.ALLOF in json_schema and Keyword.ONEOF in json_schema:
             parent_schema = json_schema.copy()
-            oneof_list = parent_schema.pop(Keyword.ONEOF)
             allof_list = parent_schema.pop(Keyword.ALLOF)
+            oneof_list = parent_schema.pop(Keyword.ONEOF)
             # Reduce the problem to a oneOf of allOfs
             return lm + self.oneOf(
-                anyof_list=[
+                oneof_list=[
                     {"allOf": [one_item, *allof_list]}
                     for one_item in oneof_list
                 ]
@@ -977,12 +977,14 @@ def json(
 
         if Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema:
             parent_schema = json_schema.copy()
-            oneof_list = parent_schema.pop(Keyword.ONEOF)
             anyof_list = parent_schema.pop(Keyword.ANYOF)
-            # Reduce the problem to a oneOf of anyOfs
+            oneof_list = parent_schema.pop(Keyword.ONEOF)
+            # Reduce the problem to a oneOf of allOfs
             return lm + self.oneOf(
                 oneof_list=[
-                    {"anyOf": anyof_list}
+                    {"allOf": [one_item, any_item]}
+                    for any_item in anyof_list
+                    for one_item in oneof_list
                 ]
             )
 

From be2492123b32cd8b8b5bd3242be9fe1b64dc35aa Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 09:30:27 -0700
Subject: [PATCH 05/70] pass parent schema down to allOf

---
 guidance/library/_json.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index afde1d624..c4edb4e31 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -944,7 +944,7 @@ def json(
             return lm + self.oneOf(
                 oneof_list=[
                     {"anyOf": [
-                        {"allOf": [one_item, any_item, *allof_list]}
+                        {"allOf": [one_item, any_item, *allof_list], **parent_schema}
                         for any_item in anyof_list
                     ]}
                     for one_item in oneof_list
@@ -958,7 +958,7 @@ def json(
             # Reduce the problem to an anyOf of allOfs
             return lm + self.anyOf(
                 anyof_list=[
-                    {"allOf": [any_item, *allof_list]}
+                    {"allOf": [any_item, *allof_list], **parent_schema}
                     for any_item in anyof_list
                 ]
             )
@@ -970,7 +970,7 @@ def json(
             # Reduce the problem to a oneOf of allOfs
             return lm + self.oneOf(
                 oneof_list=[
-                    {"allOf": [one_item, *allof_list]}
+                    {"allOf": [one_item, *allof_list], **parent_schema}
                     for one_item in oneof_list
                 ]
             )
@@ -979,10 +979,11 @@ def json(
             parent_schema = json_schema.copy()
             anyof_list = parent_schema.pop(Keyword.ANYOF)
             oneof_list = parent_schema.pop(Keyword.ONEOF)
+            assert Keyword.ALLOF not in parent_schema
             # Reduce the problem to a oneOf of allOfs
             return lm + self.oneOf(
                 oneof_list=[
-                    {"allOf": [one_item, any_item]}
+                    {"allOf": [one_item, any_item], **parent_schema}
                     for any_item in anyof_list
                     for one_item in oneof_list
                 ]

From f4d37897e9496545ae009a66e699cf54e8be7d99 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 09:35:52 -0700
Subject: [PATCH 06/70] validate node keys when recursively calling add_schema

---
 guidance/library/_json.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index c4edb4e31..2eeac46be 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -804,6 +804,9 @@ def add_schema(schema: JSONSchema):
                 return
             if schema is False:
                 raise ValueError("allOf contains a False schema")
+            # Validate the schema's keys (we have only validated the parent schema's keys so far)
+            # TODO: This will make us validate the parent twice... should probably be refactored
+            validate_json_node_keys(schema)
             for key, value in schema.items():
                 if key in IGNORED_KEYS:
                     continue

From 1102bc291515e9b103447df7a12b86b5fe5db0b0 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 09:36:18 -0700
Subject: [PATCH 07/70] no longer nonlocal

---
 guidance/library/_json.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 2eeac46be..34f4f1cc5 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -799,7 +799,6 @@ def handle_keyword(key: str, value: Any):
                 other_data[key] = value
 
         def add_schema(schema: JSONSchema):
-            nonlocal type
             if schema is True:
                 return
             if schema is False:

From b7043bae4373f8ddca0f6296fc2a61a662f62b51 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 09:53:31 -0700
Subject: [PATCH 08/70] punt to allOf for handling sibling keys

---
 guidance/library/_json.py | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 34f4f1cc5..30ddcef47 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -996,21 +996,42 @@ def json(
 
         if Keyword.ANYOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF)
-            if sibling_keys:
-                raise NotImplementedError(f"anyOf with sibling keys is not yet supported. Got {sibling_keys}")
-            return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF])
+            if not sibling_keys:
+                return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF])
+            # Let the allOf function handle anyOfs with sibling keys
+            parent_schema = json_schema.copy()
+            anyof_list = parent_schema.pop(Keyword.ANYOF)
+            return lm + self.anyOf(
+                anyof_list=[
+                    {"allOf": [any_item], **parent_schema}
+                    for any_item in anyof_list
+                ]
+            )
 
         if Keyword.ONEOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF)
-            if sibling_keys:
-                raise NotImplementedError(f"oneOf with sibling keys is not yet supported. Got {sibling_keys}")
-            return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF])
+            if not sibling_keys:
+                return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF])
+            # Let the allOf function handle oneOfs with sibling keys
+            parent_schema = json_schema.copy()
+            oneof_list = parent_schema.pop(Keyword.ONEOF)
+            assert Keyword.ALLOF not in parent_schema
+            return lm + self.oneOf(
+                oneof_list=[
+                    {"allOf": [one_item], **parent_schema}
+                    for one_item in oneof_list
+                ]
+            )
 
         if Keyword.REF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.REF)
-            if sibling_keys:
-                raise NotImplementedError(f"$ref with sibling keys is not yet supported. Got {sibling_keys}")
-            return lm + self.ref(reference=json_schema[Keyword.REF])
+            if not sibling_keys:
+                return lm + self.ref(reference=json_schema[Keyword.REF])
+            # Let the allOf function handle refs with sibling keys
+            parent_schema = json_schema.copy()
+            ref = parent_schema.pop(Keyword.REF)
+            assert Keyword.ALLOF not in parent_schema
+            return lm + self.allOf(parent_schema={"allOf": [{Keyword.REF: ref}], **parent_schema})
 
         if Keyword.CONST in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM}

From 79b8ffb3d2ad73d33cc65d3d223fb47019cb32a9 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 09:54:38 -0700
Subject: [PATCH 09/70] ref in allof

---
 guidance/library/_json.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 30ddcef47..441986797 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -747,12 +747,23 @@ def allOf(
         additional_properties_list = []
         other_data = {}
 
+        resolver = self._resolver.lookup(self._base_uri).resolver
+
         def handle_keyword(key: str, value: Any):
             nonlocal type
             nonlocal required
+            nonlocal resolver
 
             if key == Keyword.REF:
-                raise NotImplementedError("allOf with $ref is not yet supported")
+                value = cast(str, value)
+                resolved = resolver.lookup(value)
+                # Some funky resolver scope to handle here... We have to pretend to be the original schema
+                # TODO: we have a totally separate REF implementation for when we have no sibling keys. Need to refactor.
+                # TODO: this will probably break if we have a recursive reference in an allOf
+                old_resolver = resolver
+                resolver = resolved.resolver
+                add_schema(resolved.contents)
+                resolver = old_resolver
 
             elif key == Keyword.TYPE:
                 # TODO: Need to handle type-narrowing correctly: if we have a "number" and an "integer", we should only keep "integer".

From 02d12be6ddb60012f7a21a3a1add526e5e8fe2e5 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 09:54:52 -0700
Subject: [PATCH 10/70] types

---
 guidance/library/_json.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 441986797..f1d8c8c03 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -33,7 +33,8 @@
 from ._pydantic import pydantic_to_json_schema
 from ._subgrammar import as_regular_grammar, lexeme, subgrammar
 
-JSONSchema = Union[bool, Mapping[str, Any]]
+JSONValue = Union[None, bool, int, float, str, Mapping[str, "JSONValue"], Sequence["JSONValue"]]
+JSONSchema = Union[bool, Mapping[str, JSONValue]]
 
 DRAFT202012_RESERVED_KEYWORDS = {
     # Anchors and References
@@ -749,7 +750,7 @@ def allOf(
 
         resolver = self._resolver.lookup(self._base_uri).resolver
 
-        def handle_keyword(key: str, value: Any):
+        def handle_keyword(key: str, value: JSONValue):
             nonlocal type
             nonlocal required
             nonlocal resolver

From e2518220097af338561b9082801ab5fd09e12b6d Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 10:14:08 -0700
Subject: [PATCH 11/70] type narrowing

---
 guidance/library/_json.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index f1d8c8c03..c95de7249 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -771,9 +771,13 @@ def handle_keyword(key: str, value: JSONValue):
                 # For now, we'll just intersect the types.
                 value = cast(Union[str, Sequence[str]], value)
                 if isinstance(value, str):
-                    type = {value}
+                    value_set = {value}
                 else:
-                    type &= set(value)
+                    value_set = set(value)
+                if JSONType.NUMBER in value_set:
+                    # Number implies integer
+                    value_set.add(JSONType.INTEGER)
+                type &= value_set
                 # Throw an error early if we have conflicting types
                 if not type:
                     raise ValueError("allOf with conflicting types")

From 558a2369d7eb09952b9933f7c15551a97918c33d Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 10:17:00 -0700
Subject: [PATCH 12/70] fix test that was supposed to fail under old logic but
 now passes

---
 tests/unit/library/test_json.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py
index d320fdb4f..4448a12bd 100644
--- a/tests/unit/library/test_json.py
+++ b/tests/unit/library/test_json.py
@@ -2240,24 +2240,12 @@ def test_allOf_ref(self):
         generate_and_check(target_obj, schema_obj)
 
     def test_allOf_bad_schema(self):
-        schema = """{
-        "allOf" : [{ "type": "integer" }, { "type": "number" }]
+        schema = {
+            "allOf" : [{ "type": "integer" }, { "type": "string" }]
         }
-        """
-        # First sanity check what we're setting up
-        schema_obj = json.loads(schema)
-
-        TARGET_VALUE = 20
-        validate(instance=TARGET_VALUE, schema=schema_obj)
-
-        prepared_string = f"<s>{json_dumps(TARGET_VALUE)}"
-        lm = models.Mock(prepared_string.encode())
-
-        # Run with the mock model
-        CAPTURE_KEY = "my_capture"
         with pytest.raises(ValueError) as ve:
-            lm += gen_json(name=CAPTURE_KEY, schema=schema_obj)
-        assert ve.value.args[0] == "Only support allOf with exactly one item"
+            _ = gen_json(schema=schema)
+        assert ve.value.args[0] == "allOf with conflicting types"
 
 class TestOneOf:
     @pytest.mark.parametrize("target_obj", [123, 42])

From 7296061476e7db2622dbf736a287321a8dff3adf Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 10:18:49 -0700
Subject: [PATCH 13/70] un xfail ref with siblings

---
 tests/unit/library/test_json.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py
index 4448a12bd..f1e38f6b8 100644
--- a/tests/unit/library/test_json.py
+++ b/tests/unit/library/test_json.py
@@ -1273,7 +1273,6 @@ def test_nested_refs(self, test_object, valid):
             ({"foo": "string"}, False),
         ],
     )
-    @pytest.mark.xfail(reason="sibling keywords to ref are not yet supported")
     def test_ref_applies_alongside_sibling_keywords(self, test_object, valid):
         schema = {
             "$schema": "https://json-schema.org/draft/2020-12/schema",

From cf6eb15e312512e3af3887e84d261a3e982347f9 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 10:34:24 -0700
Subject: [PATCH 14/70] remove xfails for siblings (still failing but for wrong
 reason...)

---
 tests/unit/library/test_json.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py
index f1e38f6b8..52672730c 100644
--- a/tests/unit/library/test_json.py
+++ b/tests/unit/library/test_json.py
@@ -1562,7 +1562,6 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
             ({"foo": {"bar": "a"}, "bar": "a"}, True),
         ],
     )
-    @pytest.mark.xfail(reason="refs with sibling keywords are not yet supported")
     def test_refs_with_relative_uris_and_defs(self, test_object, valid):
         schema = {
             "$schema": "https://json-schema.org/draft/2020-12/schema",
@@ -1595,7 +1594,6 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid):
             ({"foo": {"bar": "a"}, "bar": "a"}, True),
         ],
     )
-    @pytest.mark.xfail(reason="refs with sibling keywords are not yet supported")
     def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid):
         schema = {
             "$schema": "https://json-schema.org/draft/2020-12/schema",

From 5c1df52b378015a4dc605a418b7be3e7f6f0829a Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 12:13:01 -0700
Subject: [PATCH 15/70] drop todo

---
 guidance/library/_json.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index c95de7249..b7fe8b86a 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -767,8 +767,6 @@ def handle_keyword(key: str, value: JSONValue):
                 resolver = old_resolver
 
             elif key == Keyword.TYPE:
-                # TODO: Need to handle type-narrowing correctly: if we have a "number" and an "integer", we should only keep "integer".
-                # For now, we'll just intersect the types.
                 value = cast(Union[str, Sequence[str]], value)
                 if isinstance(value, str):
                     value_set = {value}

From 19aff29ab1ccec7039bf5477b0ca4ebb0d4a298e Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 12:27:44 -0700
Subject: [PATCH 16/70] items

---
 guidance/library/_json.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index b7fe8b86a..40c08e2db 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -743,10 +743,11 @@ def allOf(
         parent_schema: JSONSchema,
     ):
         type = set(JSONType)
-        properties = {}
-        required = set()
-        additional_properties_list = []
-        other_data = {}
+        properties: dict[str, JSONSchema] = {}
+        required: set[str] = set()
+        additional_properties_list: list[JSONSchema] = []
+        items_list: list[JSONSchema] = []
+        other_data: dict[str, JSONValue] = {}
 
         resolver = self._resolver.lookup(self._base_uri).resolver
 
@@ -799,9 +800,15 @@ def handle_keyword(key: str, value: JSONValue):
                 required |= set(value)
 
             elif key == ObjectKeywords.ADDITIONAL_PROPERTIES:
+                # TODO: do the additionalProperties of one schema need to evaluate against the properties of another?
+                # TODO: unevaluatedProperties?
                 value = cast(JSONSchema, value)
                 additional_properties_list.append(value)
 
+            elif key == ArrayKeywords.ITEMS:
+                value = cast(JSONSchema, value)
+                items_list.append(value)
+
             elif key in set(Keyword):
                 # If we've done our job right, we should never hit this case...
                 raise NotImplementedError(f"Don't yet know how to handle {key} in allOf")
@@ -837,6 +844,8 @@ def add_schema(schema: JSONSchema):
             combined_schema[ObjectKeywords.REQUIRED] = required
         if additional_properties_list:
             combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list}
+        if items_list:
+            combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list}
 
         return lm + self.json(json_schema=combined_schema)
 

From fa8617811941cf209ffe0754f8523bcb3d3dc8a2 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 12:29:11 -0700
Subject: [PATCH 17/70] safer update

---
 guidance/library/_json.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 40c08e2db..912d8e214 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -836,7 +836,6 @@ def add_schema(schema: JSONSchema):
 
         combined_schema = {
             Keyword.TYPE: type,
-            **other_data
         }
         if properties:
             combined_schema[ObjectKeywords.PROPERTIES] = properties
@@ -847,6 +846,9 @@ def add_schema(schema: JSONSchema):
         if items_list:
             combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list}
 
+        assert not set(combined_schema) & set(other_data)
+        combined_schema.update(other_data)
+
         return lm + self.json(json_schema=combined_schema)
 
 

From 70d27ec78c395faf17628f1b1369c99f7d5eaa35 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 13:30:00 -0700
Subject: [PATCH 18/70] defaultdict for allOf properties

---
 guidance/library/_json.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 912d8e214..d16cc8885 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -16,6 +16,7 @@
 import warnings
 import referencing
 import contextlib
+from collections import defaultdict
 from urllib.parse import urljoin
 
 try:
@@ -743,7 +744,7 @@ def allOf(
         parent_schema: JSONSchema,
     ):
         type = set(JSONType)
-        properties: dict[str, JSONSchema] = {}
+        properties: defaultdict[str, list[JSONSchema]] = defaultdict(list)
         required: set[str] = set()
         additional_properties_list: list[JSONSchema] = []
         items_list: list[JSONSchema] = []
@@ -789,11 +790,7 @@ def handle_keyword(key: str, value: JSONValue):
             elif key == ObjectKeywords.PROPERTIES:
                 value = cast(Mapping[str, JSONSchema], value)
                 for name, schema in value.items():
-                    if name in properties:
-                        # Will be recursively merged later
-                        properties[name] = {"allOf": [properties[name], schema]}
-                    else:
-                        properties[name] = schema
+                    properties[name].append(schema)
 
             elif key == ObjectKeywords.REQUIRED:
                 value = cast(Sequence[str], value)
@@ -838,7 +835,9 @@ def add_schema(schema: JSONSchema):
             Keyword.TYPE: type,
         }
         if properties:
-            combined_schema[ObjectKeywords.PROPERTIES] = properties
+            combined_schema[ObjectKeywords.PROPERTIES] = {}
+            for name, schemas in properties.items():
+                combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas}
         if required:
             combined_schema[ObjectKeywords.REQUIRED] = required
         if additional_properties_list:

From 0d5b053c883ed080d4fece9048884a4f6d8b24e1 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 15:58:40 -0700
Subject: [PATCH 19/70] flatten allOfs when possible

---
 guidance/library/_json.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index d16cc8885..790e9734b 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -837,13 +837,22 @@ def add_schema(schema: JSONSchema):
         if properties:
             combined_schema[ObjectKeywords.PROPERTIES] = {}
             for name, schemas in properties.items():
-                combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas}
+                if len(schemas) == 1:
+                    combined_schema[ObjectKeywords.PROPERTIES][name] = schemas[0]
+                else:
+                    combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas}
         if required:
             combined_schema[ObjectKeywords.REQUIRED] = required
         if additional_properties_list:
-            combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list}
+            if len(additional_properties_list) == 1:
+                combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = additional_properties_list[0]
+            else:
+                combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list}
         if items_list:
-            combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list}
+            if len(items_list) == 1:
+                combined_schema[ArrayKeywords.ITEMS] = items_list[0]
+            else:
+                combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list}
 
         assert not set(combined_schema) & set(other_data)
         combined_schema.update(other_data)

From f3be1ecdd61db65f23c520ee290bf582fe881bde Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 16:11:35 -0700
Subject: [PATCH 20/70] pass around base uri rather than context manager for
 finer grained control

---
 guidance/library/_json.py | 163 ++++++++++++++++++++------------------
 1 file changed, 84 insertions(+), 79 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 790e9734b..c70a8ac8b 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -15,9 +15,14 @@
 )
 import warnings
 import referencing
-import contextlib
 from collections import defaultdict
-from urllib.parse import urljoin
+import urllib.parse
+
+def urijoin(base: str, uri: str) -> str:
+    # Special case for fragment-only URIs
+    if uri.startswith("#"):
+        return f"{base}{uri}"
+    return urllib.parse.urljoin(base, uri)
 
 try:
     import jsonschema
@@ -131,6 +136,7 @@ class Keyword(str, Enum):
     ANYOF = "anyOf"
     ALLOF = "allOf" # Note: Partial support. Only supports exactly one item.
     ONEOF = "oneOf" # Note: Partial support. This is converted to anyOf.
+    ID = "$id"
     REF = "$ref"
     CONST = "const"
     ENUM = "enum"
@@ -171,7 +177,6 @@ class ObjectKeywords(str, Enum):
     "$anchor",
     "$defs",
     "$schema",
-    "$id",
     "id",
     "$comment",
     "title",
@@ -430,6 +435,7 @@ def ref(
         lm,
         *,
         reference: str,
+        base_uri: str,
     ):
         """
         Resolve a reference to another schema and return the grammar for that schema.
@@ -438,53 +444,24 @@ def ref(
         add it to the _defs cache. This allows us to avoid re-resolving the reference every time
         and to handle recursive references correctly.
         """
-        abspath = self._get_abspath(reference)
+        abspath = urijoin(base_uri, reference)
+
         if abspath not in self._defs:
             resolved = self._resolver.lookup(abspath)
             base_uri_of_resolved = resolved.resolver._base_uri
 
             @guidance(stateless=True, dedent=False, cache=True)
             def closure(lm):
-                with self._base_uri_context(base_uri_of_resolved):
-                    grammar = self.json(json_schema=resolved.contents)
+                grammar = self.json(json_schema=resolved.contents, base_uri=base_uri_of_resolved)
                 return lm + grammar
 
             self._defs[abspath] = closure
         return lm + self._defs[abspath]()
 
 
-    def _get_abspath(self, ref):
-        """
-        Convert a reference to an absolute path, resolving it against the base URI if necessary.
-        This will allow us to get a unique key for each reference and hit the _defs cache correctly.
-        """
-        if ref.startswith("#"):
-            # Special case for fragment-only references:
-            # for certain schemes (e.g. urn), urljoin may throw the base URI, but we need to keep them around
-            return f"{self._base_uri}{ref}"
-        return urljoin(self._base_uri, ref)
-
-
-    @contextlib.contextmanager
-    def _base_uri_context(self, base_uri: str):
-        """
-        Temporarily replace the base_uri for the duration of the context manager.
-        This allows refs with different base URIs to be resolved correctly without passing the resolver around.
-
-        Note: very much not thread-safe, but I don't expect instances of this class to be shared between threads.
-        TODO: ensure that the instance's hash depends on the base_uri before adding more caching to this class.
-        """
-        old_base_uri = self._base_uri
-        self._base_uri = base_uri
-        try:
-            yield
-        finally:
-            self._base_uri = old_base_uri
-
-
     @guidance(stateless=True)
     def root(self, lm):
-        return lm + self.json(json_schema=self.schema)
+        return lm + self.json(json_schema=self.schema, base_uri=self._base_uri)
 
 
     @classmethod
@@ -568,6 +545,7 @@ def object(
         properties: Mapping[str, JSONSchema],
         additional_properties: JSONSchema,
         required: Sequence[str],
+        base_uri: str,
     ):
         # "required" keys will be validated against "properties" if they're present, otherwise against "additionalProperties".
         # If "additionalProperties" is False, then required keys must be in "properties".
@@ -588,7 +566,7 @@ def object(
             # Identify if the key is required
             required_items.append(name in required)
             # Build the grammar we'll use for this property
-            grammars.append(f'{key}{self.key_separator}' + self.json(json_schema=properties.get(name, additional_properties)))
+            grammars.append(f'{key}{self.key_separator}' + self.json(json_schema=properties.get(name, additional_properties), base_uri=base_uri))
 
         if additional_properties is not False:
             # Key for additionalProperties is a json string, but we need to disallow any properties that are already defined
@@ -604,7 +582,7 @@ def object(
             else:
                 additional_key_grammar = self.string()
 
-            additional_item_grammar = additional_key_grammar + self.key_separator + self.json(json_schema=additional_properties)
+            additional_item_grammar = additional_key_grammar + self.key_separator + self.json(json_schema=additional_properties, base_uri=base_uri)
             additional_items_grammar = sequence(additional_item_grammar + self.item_separator) + additional_item_grammar
             grammars.append(additional_items_grammar)
             required_items.append(False)
@@ -651,6 +629,7 @@ def array(
         item_schema: JSONSchema,
         min_items: int,
         max_items: Optional[int],
+        base_uri: str,
     ):
         if len(prefix_items_schema) < min_items and item_schema is False:
             raise ValueError(
@@ -675,7 +654,7 @@ def array(
                 assert i >= min_items
                 break
 
-            item = self.json(json_schema=schema)
+            item = self.json(json_schema=schema, base_uri=base_uri)
 
             if i < min_items:
                 required_items.append(item)
@@ -684,7 +663,7 @@ def array(
 
         if max_items is None and item_schema is not False:
             # Add an infinite tail of items
-            item = self.json(json_schema=item_schema)
+            item = self.json(json_schema=item_schema, base_uri=base_uri)
             optional_items.append(item + sequence(self.item_separator + item))
 
         lm += "["
@@ -720,8 +699,9 @@ def anyOf(
         lm,
         *,
         anyof_list: Sequence[JSONSchema],
+        base_uri: str,
     ):
-        options = [self.json(json_schema=item) for item in anyof_list]
+        options = [self.json(json_schema=item, base_uri=base_uri) for item in anyof_list]
         return lm + select(options)
 
     @guidance(stateless=True)
@@ -730,11 +710,12 @@ def oneOf(
         lm,
         *,
         oneof_list: Sequence[JSONSchema],
+        base_uri: str,
     ):
         if len(oneof_list) == 1:
-            return lm + self.json(json_schema=oneof_list[0])
+            return lm + self.json(json_schema=oneof_list[0], base_uri=base_uri)
         warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")
-        return lm + self.anyOf(anyof_list=oneof_list)
+        return lm + self.anyOf(anyof_list=oneof_list, base_uri=base_uri)
 
     @guidance(stateless=True)
     def allOf(
@@ -742,6 +723,7 @@ def allOf(
         lm,
         *,
         parent_schema: JSONSchema,
+        base_uri: str,
     ):
         type = set(JSONType)
         properties: defaultdict[str, list[JSONSchema]] = defaultdict(list)
@@ -750,23 +732,15 @@ def allOf(
         items_list: list[JSONSchema] = []
         other_data: dict[str, JSONValue] = {}
 
-        resolver = self._resolver.lookup(self._base_uri).resolver
-
-        def handle_keyword(key: str, value: JSONValue):
+        def handle_keyword(key: str, value: JSONValue, base_uri: str):
             nonlocal type
             nonlocal required
-            nonlocal resolver
 
             if key == Keyword.REF:
-                value = cast(str, value)
-                resolved = resolver.lookup(value)
-                # Some funky resolver scope to handle here... We have to pretend to be the original schema
-                # TODO: we have a totally separate REF implementation for when we have no sibling keys. Need to refactor.
-                # TODO: this will probably break if we have a recursive reference in an allOf
-                old_resolver = resolver
-                resolver = resolved.resolver
-                add_schema(resolved.contents)
-                resolver = old_resolver
+                ref = cast(str, value)
+                abspath = urijoin(base_uri, ref)
+                resolved = self._resolver.lookup(abspath)
+                add_schema(resolved.contents, base_uri=resolved.resolver._base_uri)
 
             elif key == Keyword.TYPE:
                 value = cast(Union[str, Sequence[str]], value)
@@ -785,11 +759,16 @@ def handle_keyword(key: str, value: JSONValue):
             elif key == Keyword.ALLOF:
                 value = cast(Sequence[JSONSchema], value)
                 for schema in value:
-                    add_schema(schema)
+                    add_schema(schema, base_uri)
 
             elif key == ObjectKeywords.PROPERTIES:
                 value = cast(Mapping[str, JSONSchema], value)
                 for name, schema in value.items():
+                    this_base_uri = schema.get(Keyword.ID, base_uri)
+                    if Keyword.REF in schema:
+                        # Make the ref absolute so that it can be resolved in the right scope later
+                        schema = schema.copy()
+                        schema[Keyword.REF] = urijoin(this_base_uri, schema[Keyword.REF])
                     properties[name].append(schema)
 
             elif key == ObjectKeywords.REQUIRED:
@@ -816,20 +795,28 @@ def handle_keyword(key: str, value: JSONValue):
             else:
                 other_data[key] = value
 
-        def add_schema(schema: JSONSchema):
+        def add_schema(schema: JSONSchema, base_uri: str):
             if schema is True:
                 return
             if schema is False:
                 raise ValueError("allOf contains a False schema")
+
             # Validate the schema's keys (we have only validated the parent schema's keys so far)
             # TODO: This will make us validate the parent twice... should probably be refactored
             validate_json_node_keys(schema)
+
+            # Set the base_uri for this schema
+            if Keyword.ID in schema:
+                # TODO: avoid copies if possible..?
+                schema = schema.copy()
+                base_uri = urijoin(base_uri, schema.pop(Keyword.ID))
+
             for key, value in schema.items():
                 if key in IGNORED_KEYS:
                     continue
-                handle_keyword(key, value)
+                handle_keyword(key, value, base_uri)
 
-        add_schema(parent_schema)
+        add_schema(parent_schema, base_uri)
 
         combined_schema = {
             Keyword.TYPE: type,
@@ -857,7 +844,7 @@ def add_schema(schema: JSONSchema):
         assert not set(combined_schema) & set(other_data)
         combined_schema.update(other_data)
 
-        return lm + self.json(json_schema=combined_schema)
+        return lm + self.json(json_schema=combined_schema, base_uri=base_uri)
 
 
     @guidance(stateless=True)
@@ -893,7 +880,8 @@ def const(
                     "properties": {k: {"const": v} for k, v in dict(value).items()},
                     "required": list(value.keys()),
                     "additionalProperties": False,
-                }
+                },
+                base_uri="", # dummy value -- we don't need to resolve anything
             )
         if isinstance(value, Sequence):
             return lm + self.json(
@@ -903,7 +891,8 @@ def const(
                     "minItems": len(value),
                     "maxItems": len(value),
                     "items": False,
-                }
+                },
+                base_uri="", # dummy value -- we don't need to resolve anything
             )
         raise TypeError(f"Unsupported value type: {type(value)} for value: {value!r}")
 
@@ -931,23 +920,26 @@ def enum(
     def any(self, lm):
         return lm + select(
             [
-                self.json(json_schema={"type": "null"}),
-                self.json(json_schema={"type": "boolean"}),
-                self.json(json_schema={"type": "integer"}),
-                self.json(json_schema={"type": "number"}),
-                self.json(json_schema={"type": "string"}),
+                # Dummy base uris ok since we're not resolving anything
+                self.json(json_schema={"type": "null"}, base_uri=""),
+                self.json(json_schema={"type": "boolean"}, base_uri=""),
+                self.json(json_schema={"type": "integer"}, base_uri=""),
+                self.json(json_schema={"type": "number"}, base_uri=""),
+                self.json(json_schema={"type": "string"}, base_uri=""),
                 # Recursive cases
                 self.json(
                     json_schema={
                         "type": "array",
                         "items": True,
                     },
+                    base_uri="",
                 ),
                 self.json(
                     json_schema={
                         "type": "object",
                         "additionalProperties": True,
                     },
+                    base_uri="",
                 ),
             ]
         )
@@ -959,6 +951,7 @@ def json(
         lm,
         *,
         json_schema: JSONSchema,
+        base_uri: str,
     ):
         if json_schema is True:
             json_schema = {}
@@ -970,6 +963,10 @@ def json(
 
         validate_json_node_keys(json_schema)
 
+        if Keyword.ID in json_schema:
+            # "cd" into the new base_uri
+            base_uri = urijoin(base_uri, json_schema[Keyword.ID])
+
         if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema:
             parent_schema = json_schema.copy()
             anyof_list = parent_schema.pop(Keyword.ANYOF)
@@ -983,7 +980,8 @@ def json(
                         for any_item in anyof_list
                     ]}
                     for one_item in oneof_list
-                ]
+                ],
+                base_uri=base_uri,
             )
 
         if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema:
@@ -995,7 +993,8 @@ def json(
                 anyof_list=[
                     {"allOf": [any_item, *allof_list], **parent_schema}
                     for any_item in anyof_list
-                ]
+                ],
+                base_uri=base_uri,
             )
 
         if Keyword.ALLOF in json_schema and Keyword.ONEOF in json_schema:
@@ -1007,7 +1006,8 @@ def json(
                 oneof_list=[
                     {"allOf": [one_item, *allof_list], **parent_schema}
                     for one_item in oneof_list
-                ]
+                ],
+                base_uri=base_uri,
             )
 
         if Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema:
@@ -1021,16 +1021,17 @@ def json(
                     {"allOf": [one_item, any_item], **parent_schema}
                     for any_item in anyof_list
                     for one_item in oneof_list
-                ]
+                ],
+                base_uri=base_uri,
             )
 
         if Keyword.ALLOF in json_schema:
-            return lm + self.allOf(parent_schema=json_schema)
+            return lm + self.allOf(parent_schema=json_schema, base_uri=base_uri)
 
         if Keyword.ANYOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF)
             if not sibling_keys:
-                return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF])
+                return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF], base_uri=base_uri)
             # Let the allOf function handle anyOfs with sibling keys
             parent_schema = json_schema.copy()
             anyof_list = parent_schema.pop(Keyword.ANYOF)
@@ -1038,13 +1039,14 @@ def json(
                 anyof_list=[
                     {"allOf": [any_item], **parent_schema}
                     for any_item in anyof_list
-                ]
+                ],
+                base_uri=base_uri,
             )
 
         if Keyword.ONEOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF)
             if not sibling_keys:
-                return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF])
+                return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF], base_uri=base_uri)
             # Let the allOf function handle oneOfs with sibling keys
             parent_schema = json_schema.copy()
             oneof_list = parent_schema.pop(Keyword.ONEOF)
@@ -1053,18 +1055,19 @@ def json(
                 oneof_list=[
                     {"allOf": [one_item], **parent_schema}
                     for one_item in oneof_list
-                ]
+                ],
+                base_uri=base_uri,
             )
 
         if Keyword.REF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.REF)
             if not sibling_keys:
-                return lm + self.ref(reference=json_schema[Keyword.REF])
+                return lm + self.ref(reference=json_schema[Keyword.REF], base_uri=base_uri)
             # Let the allOf function handle refs with sibling keys
             parent_schema = json_schema.copy()
             ref = parent_schema.pop(Keyword.REF)
             assert Keyword.ALLOF not in parent_schema
-            return lm + self.allOf(parent_schema={"allOf": [{Keyword.REF: ref}], **parent_schema})
+            return lm + self.allOf(parent_schema={"allOf": [{Keyword.REF: ref}], **parent_schema}, base_uri=base_uri)
 
         if Keyword.CONST in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM}
@@ -1139,12 +1142,14 @@ def json(
                     item_schema=json_schema.get(ArrayKeywords.ITEMS, True),
                     min_items=json_schema.get(ArrayKeywords.MIN_ITEMS, 0),
                     max_items=json_schema.get(ArrayKeywords.MAX_ITEMS, None),
+                    base_uri=base_uri,
                 )
             elif target_type == JSONType.OBJECT:
                 option = self.object(
                     properties=json_schema.get(ObjectKeywords.PROPERTIES, {}),
                     additional_properties=json_schema.get(ObjectKeywords.ADDITIONAL_PROPERTIES, True),
                     required=json_schema.get(ObjectKeywords.REQUIRED, set()),
+                    base_uri=base_uri,
                 )
             else:
                 raise ValueError(f"Unsupported type in schema: {target_type}")

From 2732bd7c79e6bab9cf1e654b11ecf721de8f4007 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 16:33:12 -0700
Subject: [PATCH 21/70] reorder properties in test cases to be consistent with
 the order we validate (arbitrary...)

---
 tests/unit/library/test_json.py       | 12 ++++++------
 tests/unit/library/test_json_allOf.py | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py
index 52672730c..65a915626 100644
--- a/tests/unit/library/test_json.py
+++ b/tests/unit/library/test_json.py
@@ -1555,11 +1555,11 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
         ["test_object", "valid"],
         [
             # invalid on inner field
-            ({"foo": {"bar": 1}, "bar": "a"}, False),
+            ({"bar": "a", "foo": {"bar": 1}}, False),
             # invalid on outer field
-            ({"foo": {"bar": "a"}, "bar": 1}, False),
+            ({ "bar": 1, "foo": {"bar": "a"}}, False),
             # valid on both fields
-            ({"foo": {"bar": "a"}, "bar": "a"}, True),
+            ({"bar": "a", "foo": {"bar": "a"}, }, True),
         ],
     )
     def test_refs_with_relative_uris_and_defs(self, test_object, valid):
@@ -1587,11 +1587,11 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid):
         ["test_object", "valid"],
         [
             # invalid on inner field
-            ({"foo": {"bar": 1}, "bar": "a"}, False),
+            ({"bar": "a", "foo": {"bar": 1}}, False),
             # invalid on outer field
-            ({"foo": {"bar": "a"}, "bar": 1}, False),
+            ({"bar": 1, "foo": {"bar": "a"}}, False),
             # valid on both fields
-            ({"foo": {"bar": "a"}, "bar": "a"}, True),
+            ({"bar": "a", "foo": {"bar": "a"}}, True),
         ],
     )
     def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid):
diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py
index 6ef14aee3..3544bd643 100644
--- a/tests/unit/library/test_json_allOf.py
+++ b/tests/unit/library/test_json_allOf.py
@@ -6,18 +6,18 @@
 from .test_json import check_match_failure, generate_and_check
 
 
-class TestDynamicRefs:
+class TestAllOf:
     @pytest.mark.parametrize(
         ["test_object", "valid"],
         [
             # allOf
-            ({"foo": "baz", "bar": 2}, True),
+            ({"bar": 2, "foo": "baz"}, True),
             # mismatch second
             ({"foo": "baz"}, False),
             # mismatch first
             ({"bar": 2}, False),
             # wrong type
-            ({"foo": "baz", "bar": "quux"}, False),
+            ({"bar": "quux", "foo": "baz"}, False),
         ],
     )
     def test_allOf(self, test_object, valid):
@@ -40,13 +40,13 @@ def test_allOf(self, test_object, valid):
         ["test_object", "valid"],
         [
             # valid
-            ({"foo": "quux", "bar": 2, "baz": None}, True),
+            ({"bar": 2, "foo": "quux", "baz": None}, True),
             # mismatch base schema
             ({"foo": "quux", "baz": None}, False),
             # mismatch first allOf
             ({"bar": 2, "baz": None}, False),
             # mismatch second allOf
-            ({"foo": "quux", "bar": 2}, False),
+            ({"bar": 2, "foo": "quux"}, False),
             # mismatch both
             ({"bar": 2}, False),
         ],

From 6bc08aed454331d615c12d51c986a86421cc95f6 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 16:36:06 -0700
Subject: [PATCH 22/70] false schemas

---
 tests/unit/library/test_json_allOf.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py
index 3544bd643..64ec88d90 100644
--- a/tests/unit/library/test_json_allOf.py
+++ b/tests/unit/library/test_json_allOf.py
@@ -3,6 +3,7 @@
 import pytest
 from jsonschema import ValidationError, validate
 
+from guidance import json as gen_json
 from .test_json import check_match_failure, generate_and_check
 
 
@@ -126,7 +127,9 @@ def test_allOf_with_boolean_schemas_some_false(self, test_object, valid):
         else:
             with pytest.raises(ValidationError):
                 validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+            with pytest.raises(ValueError) as ve:
+                _ = gen_json(schema=schema)
+            assert ve.value.args[0] == "allOf contains a False schema"
 
     @pytest.mark.parametrize(
         ["test_object", "valid"],
@@ -146,7 +149,9 @@ def test_allOf_with_boolean_schemas_all_false(self, test_object, valid):
         else:
             with pytest.raises(ValidationError):
                 validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+            with pytest.raises(ValueError) as ve:
+                _ = gen_json(schema=schema)
+            assert ve.value.args[0] == "allOf contains a False schema"
 
     @pytest.mark.parametrize(
         ["test_object", "valid"],

From a0b1c12839860cc839591f1f9fca88189295e80d Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 17:04:13 -0700
Subject: [PATCH 23/70] enum and const

---
 guidance/library/_json.py | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index c70a8ac8b..cc76ecc10 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -42,6 +42,11 @@ def urijoin(base: str, uri: str) -> str:
 JSONValue = Union[None, bool, int, float, str, Mapping[str, "JSONValue"], Sequence["JSONValue"]]
 JSONSchema = Union[bool, Mapping[str, JSONValue]]
 
+class Unset(Enum):
+    # https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions
+    token = 0
+_unset = Unset.token
+
 DRAFT202012_RESERVED_KEYWORDS = {
     # Anchors and References
     '$anchor',
@@ -731,10 +736,14 @@ def allOf(
         additional_properties_list: list[JSONSchema] = []
         items_list: list[JSONSchema] = []
         other_data: dict[str, JSONValue] = {}
+        enum: Optional[list[JSONValue]] = None
+        const: Union[Unset, JSONValue] = _unset
 
         def handle_keyword(key: str, value: JSONValue, base_uri: str):
             nonlocal type
             nonlocal required
+            nonlocal const
+            nonlocal enum
 
             if key == Keyword.REF:
                 ref = cast(str, value)
@@ -742,6 +751,26 @@ def handle_keyword(key: str, value: JSONValue, base_uri: str):
                 resolved = self._resolver.lookup(abspath)
                 add_schema(resolved.contents, base_uri=resolved.resolver._base_uri)
 
+            elif key == Keyword.CONST:
+                value = cast(JSONValue, value)
+                if const is not _unset and const != value:
+                    raise ValueError(f"allOf with multiple conflicting const values: {const!r} and {value!r}")
+                const = value
+
+            elif key == Keyword.ENUM:
+                value = cast(Sequence[JSONValue], value)
+                if enum is not None:
+                    try:
+                        enum = list(set(enum) & set(value))
+                    except TypeError:
+                        # Check on equality, not on hash
+                        # Yes, this is O(n^2).
+                        # Hope the items were unique.
+                        # ¯\_(ツ)_/¯
+                        enum = [a for a in enum if a == b for b in value]
+                else:
+                    enum = value
+
             elif key == Keyword.TYPE:
                 value = cast(Union[str, Sequence[str]], value)
                 if isinstance(value, str):
@@ -819,7 +848,7 @@ def add_schema(schema: JSONSchema, base_uri: str):
         add_schema(parent_schema, base_uri)
 
         combined_schema = {
-            Keyword.TYPE: type,
+            Keyword.TYPE: list(type),
         }
         if properties:
             combined_schema[ObjectKeywords.PROPERTIES] = {}
@@ -840,6 +869,10 @@ def add_schema(schema: JSONSchema, base_uri: str):
                 combined_schema[ArrayKeywords.ITEMS] = items_list[0]
             else:
                 combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list}
+        if enum is not None:
+            combined_schema[Keyword.ENUM] = enum
+        if const is not _unset:
+            combined_schema[Keyword.CONST] = const
 
         assert not set(combined_schema) & set(other_data)
         combined_schema.update(other_data)

From 4505171074301b7a486a1a96343f5c915155f7cf Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 17:04:54 -0700
Subject: [PATCH 24/70] modify test to use enum instead of multipleOf (which we
 don't have an implementation of)

---
 tests/unit/library/test_json_allOf.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py
index 64ec88d90..ebf8fcc25 100644
--- a/tests/unit/library/test_json_allOf.py
+++ b/tests/unit/library/test_json_allOf.py
@@ -277,9 +277,9 @@ def test_nested_allOf_to_check_validation_semantics(self, test_object, valid):
     def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
         schema = {
             "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "allOf": [{"multipleOf": 2}],
-            "anyOf": [{"multipleOf": 3}],
-            "oneOf": [{"multipleOf": 5}],
+            "allOf": [{"enum": [2, 6, 10, 30]}],
+            "anyOf": [{"enum": [3, 6, 15, 30]}],
+            "oneOf": [{"enum": [5, 10, 15, 30]}],
         }
         if valid:
             validate(instance=test_object, schema=schema)

From 7d0b576e8316eb83a20cd4c9c63cce1390bf7939 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 17:07:50 -0700
Subject: [PATCH 25/70] remove the ternary implementation since the union of
 all the binary ones cover it

---
 guidance/library/_json.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index cc76ecc10..ecb600580 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -1000,23 +1000,6 @@ def json(
             # "cd" into the new base_uri
             base_uri = urijoin(base_uri, json_schema[Keyword.ID])
 
-        if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema:
-            parent_schema = json_schema.copy()
-            anyof_list = parent_schema.pop(Keyword.ANYOF)
-            allof_list = parent_schema.pop(Keyword.ALLOF)
-            oneof_list = parent_schema.pop(Keyword.ONEOF)
-            # Reduce the problem to a oneOf of anyOfs of allOfs
-            return lm + self.oneOf(
-                oneof_list=[
-                    {"anyOf": [
-                        {"allOf": [one_item, any_item, *allof_list], **parent_schema}
-                        for any_item in anyof_list
-                    ]}
-                    for one_item in oneof_list
-                ],
-                base_uri=base_uri,
-            )
-
         if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema:
             parent_schema = json_schema.copy()
             anyof_list = parent_schema.pop(Keyword.ANYOF)

From c5ed6b3f185d302415dfb28b57705b6086a6d259 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 30 Oct 2024 17:21:52 -0700
Subject: [PATCH 26/70] make mypy less sad

---
 guidance/library/_json.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index ecb600580..a270d23dd 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -39,8 +39,7 @@ def urijoin(base: str, uri: str) -> str:
 from ._pydantic import pydantic_to_json_schema
 from ._subgrammar import as_regular_grammar, lexeme, subgrammar
 
-JSONValue = Union[None, bool, int, float, str, Mapping[str, "JSONValue"], Sequence["JSONValue"]]
-JSONSchema = Union[bool, Mapping[str, JSONValue]]
+JSONSchema = Union[bool, dict[str, Any]]
 
 class Unset(Enum):
     # https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions
@@ -735,11 +734,11 @@ def allOf(
         required: set[str] = set()
         additional_properties_list: list[JSONSchema] = []
         items_list: list[JSONSchema] = []
-        other_data: dict[str, JSONValue] = {}
-        enum: Optional[list[JSONValue]] = None
-        const: Union[Unset, JSONValue] = _unset
+        other_data: dict[str, Any] = {}
+        enum: Optional[list[Any]] = None
+        const: Union[Unset, Any] = _unset
 
-        def handle_keyword(key: str, value: JSONValue, base_uri: str):
+        def handle_keyword(key: str, value: Any, base_uri: str):
             nonlocal type
             nonlocal required
             nonlocal const
@@ -752,13 +751,12 @@ def handle_keyword(key: str, value: JSONValue, base_uri: str):
                 add_schema(resolved.contents, base_uri=resolved.resolver._base_uri)
 
             elif key == Keyword.CONST:
-                value = cast(JSONValue, value)
                 if const is not _unset and const != value:
                     raise ValueError(f"allOf with multiple conflicting const values: {const!r} and {value!r}")
                 const = value
 
             elif key == Keyword.ENUM:
-                value = cast(Sequence[JSONValue], value)
+                value = cast(list[Any], value)
                 if enum is not None:
                     try:
                         enum = list(set(enum) & set(value))
@@ -767,12 +765,12 @@ def handle_keyword(key: str, value: JSONValue, base_uri: str):
                         # Yes, this is O(n^2).
                         # Hope the items were unique.
                         # ¯\_(ツ)_/¯
-                        enum = [a for a in enum if a == b for b in value]
+                        enum = [a for a in enum for b in value if a == b]
                 else:
                     enum = value
 
             elif key == Keyword.TYPE:
-                value = cast(Union[str, Sequence[str]], value)
+                value = cast(Union[str, list[str]], value)
                 if isinstance(value, str):
                     value_set = {value}
                 else:
@@ -791,13 +789,14 @@ def handle_keyword(key: str, value: JSONValue, base_uri: str):
                     add_schema(schema, base_uri)
 
             elif key == ObjectKeywords.PROPERTIES:
-                value = cast(Mapping[str, JSONSchema], value)
+                value = cast(dict[str, JSONSchema], value)
                 for name, schema in value.items():
-                    this_base_uri = schema.get(Keyword.ID, base_uri)
-                    if Keyword.REF in schema:
-                        # Make the ref absolute so that it can be resolved in the right scope later
-                        schema = schema.copy()
-                        schema[Keyword.REF] = urijoin(this_base_uri, schema[Keyword.REF])
+                    if isinstance(schema, dict):
+                        this_base_uri = schema.get(Keyword.ID, base_uri)
+                        if Keyword.REF in schema:
+                            # Make the ref absolute so that it can be resolved in the right scope later
+                            schema = schema.copy()
+                            schema[Keyword.REF] = urijoin(this_base_uri, schema[Keyword.REF])
                     properties[name].append(schema)
 
             elif key == ObjectKeywords.REQUIRED:
@@ -847,7 +846,7 @@ def add_schema(schema: JSONSchema, base_uri: str):
 
         add_schema(parent_schema, base_uri)
 
-        combined_schema = {
+        combined_schema: dict[str, Any] = {
             Keyword.TYPE: list(type),
         }
         if properties:

From a8cdd2a26607c9a9f40fd44167dad9d826a7c1cf Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 12:47:34 -0700
Subject: [PATCH 27/70] apply additionalProperties correctly in allOf

---
 guidance/library/_json.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index a270d23dd..26f916109 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -732,13 +732,13 @@ def allOf(
         type = set(JSONType)
         properties: defaultdict[str, list[JSONSchema]] = defaultdict(list)
         required: set[str] = set()
-        additional_properties_list: list[JSONSchema] = []
+        additional_properties_list: list[tuple[JSONSchema, set[str]]] = []
         items_list: list[JSONSchema] = []
         other_data: dict[str, Any] = {}
         enum: Optional[list[Any]] = None
         const: Union[Unset, Any] = _unset
 
-        def handle_keyword(key: str, value: Any, base_uri: str):
+        def handle_keyword(key: str, value: Any, parent_schema: JSONSchema, base_uri: str):
             nonlocal type
             nonlocal required
             nonlocal const
@@ -804,10 +804,16 @@ def handle_keyword(key: str, value: Any, base_uri: str):
                 required |= set(value)
 
             elif key == ObjectKeywords.ADDITIONAL_PROPERTIES:
-                # TODO: do the additionalProperties of one schema need to evaluate against the properties of another?
                 # TODO: unevaluatedProperties?
                 value = cast(JSONSchema, value)
-                additional_properties_list.append(value)
+                # We need to keep track of which properties are exempt from this additionalProperties schema,
+                # i.e. the ones defined in the parent schema
+                exempt_properties: set[str] = set()
+                if ObjectKeywords.PROPERTIES in parent_schema:
+                    exempt_properties = set(parent_schema[ObjectKeywords.PROPERTIES])
+                additional_properties_list.append(
+                    (value, exempt_properties)
+                )
 
             elif key == ArrayKeywords.ITEMS:
                 value = cast(JSONSchema, value)
@@ -842,13 +848,20 @@ def add_schema(schema: JSONSchema, base_uri: str):
             for key, value in schema.items():
                 if key in IGNORED_KEYS:
                     continue
-                handle_keyword(key, value, base_uri)
+                handle_keyword(key, value, schema, base_uri)
 
         add_schema(parent_schema, base_uri)
 
         combined_schema: dict[str, Any] = {
             Keyword.TYPE: list(type),
         }
+
+        # Post-process additional_properties to make sure we apply the additional properties of one
+        # schema to the properties of another schema
+        for additional_schema, exempt_properties in additional_properties_list:
+            for name in set(properties) - exempt_properties:
+                properties[name].append(additional_schema)
+
         if properties:
             combined_schema[ObjectKeywords.PROPERTIES] = {}
             for name, schemas in properties.items():
@@ -860,9 +873,9 @@ def add_schema(schema: JSONSchema, base_uri: str):
             combined_schema[ObjectKeywords.REQUIRED] = required
         if additional_properties_list:
             if len(additional_properties_list) == 1:
-                combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = additional_properties_list[0]
+                combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES], _ = additional_properties_list[0]
             else:
-                combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": additional_properties_list}
+                combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": [schema for schema, _ in additional_properties_list]}
         if items_list:
             if len(items_list) == 1:
                 combined_schema[ArrayKeywords.ITEMS] = items_list[0]

From 985d38e1a0dac4234b9a5424e16071884f2c093f Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 13:29:03 -0700
Subject: [PATCH 28/70] tests for additionalProperties in allOf

---
 tests/unit/library/test_json_allOf.py | 57 +++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py
index ebf8fcc25..23b89e2ec 100644
--- a/tests/unit/library/test_json_allOf.py
+++ b/tests/unit/library/test_json_allOf.py
@@ -288,3 +288,60 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
             with pytest.raises(ValidationError):
                 validate(instance=test_object, schema=schema)
             check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        "test_object, valid",
+        [
+            # valid: foo is integer and less than 4, bar is equal to 5, baz is integer greater than 5
+            ({"foo": 0, "bar": 5, "baz": 10}, True),
+            # valid: foo is null, bar is equal to 5, baz is null
+            ({"foo": None, "bar": 5, "baz": None}, True),
+            # valid: foo is integer and less than 4, bar is non-number, baz is integer greater than 5
+            ({"foo": 0, "bar": "quxx", "baz": 10}, True),
+            # invalid: foo is integer and greater than 4
+            ({"foo": 5, "bar": 5, "baz": 10}, False),
+            # invalid: foo is not an integer or None
+            ({"foo": "quxx", "bar": 5, "baz": 10}, False),
+            # invalid: bar is greater than 5
+            ({"foo": 0, "bar": 6, "baz": 10}, False),
+            # invalid: bar is less than 5
+            ({"foo": 0, "bar": 4, "baz": 10}, False),
+            # invalid: baz is less than 5
+            ({"foo": 0, "bar": 5, "baz": 4}, False),
+            # invalid: baz is not an integer or null
+            ({"foo": 0, "bar": 5, "baz": "quxx"}, False),
+        ]
+    )
+    @pytest.mark.parametrize(
+        "schema",
+        [
+            # The following are equivalent to this:
+            {
+                "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}},
+                "additionalProperties": {"type": ["integer", "null"], "minimum": 5}
+            },
+            # additionalProperties in parent schema
+            {
+                "allOf": [
+                    {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}
+                ],
+                "properties": {"bar": {"maximum": 5}},
+                "additionalProperties": {"type": ["integer", "null"]}
+            },
+            # additionalProperties in allOf
+            {
+                "allOf": [
+                    {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}},
+                    {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}}
+                ]
+            },
+        ]
+    )
+    def test_additionalProperties_in_allOf(self, schema, test_object, valid):
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)

From 60bf51b1ffd996858d4f9cf5b308e23289b339bc Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 13:41:09 -0700
Subject: [PATCH 29/70] add (xfailed) test for inconsistent
 additionalProperties values in allOf

---
 tests/unit/library/test_json_allOf.py | 33 +++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py
index 23b89e2ec..4fd860a72 100644
--- a/tests/unit/library/test_json_allOf.py
+++ b/tests/unit/library/test_json_allOf.py
@@ -345,3 +345,36 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid):
             with pytest.raises(ValidationError):
                 validate(instance=test_object, schema=schema)
             check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        "test_object, valid",
+        [
+            ({}, True), # empty object is valid
+            ({"foo": 1}, False), # foo is not a string
+            ({"foo": "x"}, False), # foo is not an integer
+            ({"foo": True}, False), # foo is not a string or an integer
+        ]
+    )
+    def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
+        schema = {
+            "type": "object",
+            "allOf": [
+                {"additionalProperties": {"type": "integer"}},
+                {"additionalProperties": {"type": "string"}}
+            ]
+        }
+        try:
+            if valid:
+                validate(instance=test_object, schema=schema)
+                generate_and_check(test_object, schema)
+            else:
+                with pytest.raises(ValidationError):
+                    validate(instance=test_object, schema=schema)
+                check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+        except ValueError as ve:
+            if ve.args[0] == "allOf with conflicting types":
+                pytest.xfail(
+                    reason="We should be returning a False schema from allOf if there is a conflict, but we currently raise an error"
+                )
+            else:
+                raise

From dbec4593b84674b2d431a19efc2a4d0716148735 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 14:14:48 -0700
Subject: [PATCH 30/70] mypy

---
 guidance/library/_json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 26f916109..747773445 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -738,7 +738,7 @@ def allOf(
         enum: Optional[list[Any]] = None
         const: Union[Unset, Any] = _unset
 
-        def handle_keyword(key: str, value: Any, parent_schema: JSONSchema, base_uri: str):
+        def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri: str):
             nonlocal type
             nonlocal required
             nonlocal const

From 4e688bd3e2f2b0fe1a4e78831d7faf1a39a5c9b4 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 16:49:50 -0700
Subject: [PATCH 31/70] prefixItems and items

---
 guidance/library/_json.py | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 747773445..52b2a9fe3 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -733,7 +733,8 @@ def allOf(
         properties: defaultdict[str, list[JSONSchema]] = defaultdict(list)
         required: set[str] = set()
         additional_properties_list: list[tuple[JSONSchema, set[str]]] = []
-        items_list: list[JSONSchema] = []
+        prefix_items: defaultdict[int, list[JSONSchema]] = defaultdict(list)
+        items_list: list[tuple[JSONSchema, set[int]]] = []
         other_data: dict[str, Any] = {}
         enum: Optional[list[Any]] = None
         const: Union[Unset, Any] = _unset
@@ -815,9 +816,22 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri
                     (value, exempt_properties)
                 )
 
+            elif key == ArrayKeywords.PREFIX_ITEMS:
+                value = cast(Sequence[JSONSchema], value)
+                for i, schema in enumerate(value):
+                    prefix_items[i].append(schema)
+
             elif key == ArrayKeywords.ITEMS:
+                # TODO: unevaluatedItems?
                 value = cast(JSONSchema, value)
-                items_list.append(value)
+                # We need to keep track of which prefixItems are exempt from this additionalItems schema,
+                # i.e. the ones defined in the parent schema
+                exempt_prefix_items: set[int] = set()
+                if ArrayKeywords.PREFIX_ITEMS in parent_schema:
+                    exempt_prefix_items = set(range(len(parent_schema[ArrayKeywords.PREFIX_ITEMS])))
+                items_list.append(
+                    (value, exempt_prefix_items)
+                )
 
             elif key in set(Keyword):
                 # If we've done our job right, we should never hit this case...
@@ -862,6 +876,11 @@ def add_schema(schema: JSONSchema, base_uri: str):
             for name in set(properties) - exempt_properties:
                 properties[name].append(additional_schema)
 
+        # Post-process items to make sure we apply the additional items of one schema to the prefix items of another schema
+        for additional_schema, exempt_prefix_items in items_list:
+            for i in set(prefix_items) - exempt_prefix_items:
+                prefix_items[i].append(additional_schema)
+
         if properties:
             combined_schema[ObjectKeywords.PROPERTIES] = {}
             for name, schemas in properties.items():
@@ -876,11 +895,19 @@ def add_schema(schema: JSONSchema, base_uri: str):
                 combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES], _ = additional_properties_list[0]
             else:
                 combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": [schema for schema, _ in additional_properties_list]}
+        if prefix_items:
+            combined_schema[ArrayKeywords.PREFIX_ITEMS] = []
+            for i in range(len(prefix_items)):
+                schemas = prefix_items[i]
+                if len(schemas) == 1:
+                    combined_schema[ArrayKeywords.PREFIX_ITEMS].append(schemas[0])
+                else:
+                    combined_schema[ArrayKeywords.PREFIX_ITEMS].append({"allOf": schemas})
         if items_list:
             if len(items_list) == 1:
-                combined_schema[ArrayKeywords.ITEMS] = items_list[0]
+                combined_schema[ArrayKeywords.ITEMS], _ = items_list[0]
             else:
-                combined_schema[ArrayKeywords.ITEMS] = {"allOf": items_list}
+                combined_schema[ArrayKeywords.ITEMS] = {"allOf": [schema for schema, _ in items_list]}
         if enum is not None:
             combined_schema[Keyword.ENUM] = enum
         if const is not _unset:

From f71cf9197e16e54f75a9c8bdea491f92d8a57749 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 16:50:07 -0700
Subject: [PATCH 32/70] some simple reduction ops

---
 guidance/library/_json.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 52b2a9fe3..9846f2f06 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -838,7 +838,18 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri
                 raise NotImplementedError(f"Don't yet know how to handle {key} in allOf")
 
             elif key in other_data:
-                raise NotImplementedError(f"Don't yet know how to reduce multiple values of {key!r} in allOf")
+                if key in {
+                    NumberKeywords.MINIMUM, NumberKeywords.EXCLUSIVE_MINIMUM,
+                    StringKeywords.MIN_LENGTH, ArrayKeywords.MIN_ITEMS
+                }:
+                    other_data[key] = max(other_data[key], value)
+                elif key in {
+                    NumberKeywords.MAXIMUM, NumberKeywords.EXCLUSIVE_MAXIMUM,
+                    StringKeywords.MAX_LENGTH, ArrayKeywords.MAX_ITEMS
+                }:
+                    other_data[key] = min(other_data[key], value)
+                else:
+                    raise NotImplementedError(f"Don't yet know how to reduce multiple values of {key!r} in allOf")
 
             else:
                 other_data[key] = value

From cf765fa21d192b6d76a733aa9448e2d49b4e8a46 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 16:53:20 -0700
Subject: [PATCH 33/70] deterministic order of required properties

---
 guidance/library/_json.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 9846f2f06..cb6ea3aff 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -731,7 +731,7 @@ def allOf(
     ):
         type = set(JSONType)
         properties: defaultdict[str, list[JSONSchema]] = defaultdict(list)
-        required: set[str] = set()
+        required: dict[str, None] = dict() # use a dict for ordered-set behavior
         additional_properties_list: list[tuple[JSONSchema, set[str]]] = []
         prefix_items: defaultdict[int, list[JSONSchema]] = defaultdict(list)
         items_list: list[tuple[JSONSchema, set[int]]] = []
@@ -802,7 +802,7 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri
 
             elif key == ObjectKeywords.REQUIRED:
                 value = cast(Sequence[str], value)
-                required |= set(value)
+                required.update({name: None for name in value})
 
             elif key == ObjectKeywords.ADDITIONAL_PROPERTIES:
                 # TODO: unevaluatedProperties?
@@ -900,7 +900,7 @@ def add_schema(schema: JSONSchema, base_uri: str):
                 else:
                     combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas}
         if required:
-            combined_schema[ObjectKeywords.REQUIRED] = required
+            combined_schema[ObjectKeywords.REQUIRED] = list(required.keys())
         if additional_properties_list:
             if len(additional_properties_list) == 1:
                 combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES], _ = additional_properties_list[0]

From a656c4534cd4b6087e4ba9dda192fe7b4b229bcc Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 17:23:19 -0700
Subject: [PATCH 34/70] add tests for prefixItems and items

---
 tests/unit/library/test_json_allOf.py | 58 +++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py
index 4fd860a72..318fe7cb7 100644
--- a/tests/unit/library/test_json_allOf.py
+++ b/tests/unit/library/test_json_allOf.py
@@ -378,3 +378,61 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
                 )
             else:
                 raise
+
+    @pytest.mark.parametrize(
+        "test_object, valid",
+        [
+            # valid: foo is integer and less than 4, bar is equal to 5, baz is integer greater than 5
+            ([0, 5, 10], True),
+            # valid: foo is null, bar is equal to 5, baz is null
+            ([None, 5, None], True),
+            # valid: foo is integer and less than 4, bar is non-number, baz is integer greater than 5
+            ([0, "quxx", 10], True),
+            # invalid: foo is integer and greater than 4
+            ([5, 5, 10], False),
+            # invalid: foo is not an integer or None
+            (["quxx", 5, 10], False),
+            # invalid: bar is greater than 5
+            ([0, 6, 10], False),
+            # invalid: bar is less than 5
+            ([0, 4, 10], False),
+            # invalid: baz is less than 5
+            ([0, 5, 4], False),
+            # invalid: baz is not an integer or null
+            ([0, 5, "quxx"], False),
+        ]
+    )
+    @pytest.mark.parametrize(
+        "schema",
+        [
+            # The following are equivalent to this:
+            {
+                "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}],
+                "items": {"type": ["integer", "null"], "minimum": 5}
+            },
+            # items in parent schema
+            {
+                "allOf": [
+                    {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}},
+                ],
+                "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}],
+                "items": {"type": ["integer", "null"]}
+
+            },
+            # items in allOf
+            {
+                "allOf": [
+                    {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}},
+                    {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}}
+                ]
+            },
+        ]
+    )
+    def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid):
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)

From 1cf39103c5fd8054d0fae5932ae41f3144e586e7 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Thu, 31 Oct 2024 17:27:15 -0700
Subject: [PATCH 35/70] test for two minimums or maximums

---
 tests/unit/library/test_json_allOf.py | 45 +++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/test_json_allOf.py
index 318fe7cb7..d6152a664 100644
--- a/tests/unit/library/test_json_allOf.py
+++ b/tests/unit/library/test_json_allOf.py
@@ -92,6 +92,51 @@ def test_allOf_simple_types(self, test_object, valid):
                 validate(instance=test_object, schema=schema)
             check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
 
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # mismatch one
+            (25, False),
+            # valid
+            (35, True),
+        ],
+    )
+    def test_allOf_simple_minimum(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [{"minimum": 30}, {"minimum": 20}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # mismatch one
+            (25, False),
+            # valid
+            (15, True),
+        ],
+    )
+    def test_allOf_simple_maximum(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "allOf": [{"maximum": 30}, {"maximum": 20}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+
     @pytest.mark.parametrize(
         ["test_object", "valid"],
         [

From dd0f2a459e24d413bc76333df62e490e785a7236 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 09:45:23 -0700
Subject: [PATCH 36/70] refactor json tests into multiple files

---
 tests/unit/library/json/__init__.py           |    0
 .../test_allOf.py}                            |    5 +-
 tests/unit/library/{ => json}/test_json.py    | 1029 +----------------
 tests/unit/library/json/test_refs.py          |  978 ++++++++++++++++
 .../test_string_format.py}                    |    4 +-
 tests/unit/library/json/utils.py              |   58 +
 6 files changed, 1046 insertions(+), 1028 deletions(-)
 create mode 100644 tests/unit/library/json/__init__.py
 rename tests/unit/library/{test_json_allOf.py => json/test_allOf.py} (98%)
 rename tests/unit/library/{ => json}/test_json.py (65%)
 create mode 100644 tests/unit/library/json/test_refs.py
 rename tests/unit/library/{test_json_stringformat.py => json/test_string_format.py} (99%)
 create mode 100644 tests/unit/library/json/utils.py

diff --git a/tests/unit/library/json/__init__.py b/tests/unit/library/json/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit/library/test_json_allOf.py b/tests/unit/library/json/test_allOf.py
similarity index 98%
rename from tests/unit/library/test_json_allOf.py
rename to tests/unit/library/json/test_allOf.py
index d6152a664..74aa539b6 100644
--- a/tests/unit/library/test_json_allOf.py
+++ b/tests/unit/library/json/test_allOf.py
@@ -1,10 +1,13 @@
+"""Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/allOf.json"""
+
 from json import dumps as json_dumps
 
 import pytest
 from jsonschema import ValidationError, validate
 
 from guidance import json as gen_json
-from .test_json import check_match_failure, generate_and_check
+from .utils import generate_and_check
+from .utils import check_match_failure
 
 
 class TestAllOf:
diff --git a/tests/unit/library/test_json.py b/tests/unit/library/json/test_json.py
similarity index 65%
rename from tests/unit/library/test_json.py
rename to tests/unit/library/json/test_json.py
index 65a915626..d94a98b69 100644
--- a/tests/unit/library/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -1,65 +1,14 @@
 import json
-from functools import partial
-from typing import Any, Set, Union, Optional
 
 import pytest
 from jsonschema import validate, ValidationError
-from json import dumps as json_dumps, loads as json_loads
+from json import dumps as json_dumps
 
 from guidance import json as gen_json
 from guidance import models
 
-from guidance.library._json import IGNORED_KEYS, JSONSchema
-
-from ...utils import check_match_failure as _check_match_failure
-from ...utils import check_run_with_temperature
-from ...utils import generate_and_check as _generate_and_check
-
-
-def generate_and_check(
-    target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None
-):
-    if isinstance(schema_obj, str):
-        schema_obj = json_loads(schema_obj)
-
-    # Sanity check what we're being asked
-    validate(instance=target_obj, schema=schema_obj)
-    prepared_json = json_dumps(target_obj)
-    assert json.loads(prepared_json) == target_obj
-
-    # Now test that the grammar can recognize and generate prepared_json
-    # We partial in the grammar_callable
-    if desired_temperature is not None:
-        grammar_callable = partial(
-            gen_json, schema=schema_obj, temperature=desired_temperature
-        )
-    else:
-        grammar_callable = partial(gen_json, schema=schema_obj)
-
-    lm = _generate_and_check(
-        grammar_callable,
-        test_string=prepared_json,
-    )
-    check_run_with_temperature(lm, desired_temperature)
-
-
-def check_match_failure(
-    *,
-    bad_string: str,
-    good_bytes: Optional[bytes] = None,
-    failure_byte: Optional[bytes] = None,
-    allowed_bytes: Optional[Set[bytes]] = None,
-    schema_obj: Union[str, JSONSchema],
-):
-    grammar = gen_json(schema=schema_obj)
-
-    _check_match_failure(
-        bad_string=bad_string,
-        good_bytes=good_bytes,
-        failure_byte=failure_byte,
-        allowed_bytes=allowed_bytes,
-        grammar=grammar,
-    )
+from guidance.library._json import IGNORED_KEYS
+from .utils import check_match_failure, generate_and_check
 
 
 # Common sets of allowed_bytes
@@ -1124,978 +1073,6 @@ def test_bad_with_items(
         )
 
 
-class TestRefs:
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # match
-            ({"foo": False}, True),
-            # recursive match
-            ({"foo": {"foo": False}}, True),
-            # mismatch
-            ({"bar": False}, False),
-            # recursive mismatch
-            ({"foo": {"bar": False}}, False),
-        ],
-    )
-    def test_root_pointer_ref(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "properties": {"foo": {"$ref": "#"}},
-            "additionalProperties": False,
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # match
-            ({"bar": 3}, True),
-            # mismatch
-            ({"bar": True}, False),
-        ],
-    )
-    def test_relative_pointer_ref_to_object(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "properties": {"foo": {"type": "integer"}, "bar": {"$ref": "#/properties/foo"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # match array
-            ([1, 2], True),
-            # mismatch array
-            ([1, "foo"], False),
-        ],
-    )
-    def test_relative_pointer_ref_to_array(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "prefixItems": [{"type": "integer"}, {"$ref": "#/prefixItems/0"}],
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # slash invalid
-            ({"slash": "aoeu"}, False),
-            # tilde invalid
-            ({"tilde": "aoeu"}, False),
-            # percent invalid
-            ({"percent": "aoeu"}, False),
-            # slash valid
-            ({"slash": 123}, True),
-            # tilde valid
-            ({"tilde": 123}, True),
-            # percent valid
-            ({"percent": 123}, True),
-        ],
-    )
-    def test_escaped_pointer_ref(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$defs": {
-                "tilde~field": {"type": "integer"},
-                "slash/field": {"type": "integer"},
-                "percent%field": {"type": "integer"},
-            },
-            "properties": {
-                "tilde": {"$ref": "#/$defs/tilde~0field"},
-                "slash": {"$ref": "#/$defs/slash~1field"},
-                "percent": {"$ref": "#/$defs/percent%25field"},
-            },
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # nested ref valid
-            (5, True),
-            # nested ref invalid
-            ("a", False),
-        ],
-    )
-    def test_nested_refs(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$defs": {
-                "a": {"type": "integer"},
-                "b": {"$ref": "#/$defs/a"},
-                "c": {"$ref": "#/$defs/b"},
-            },
-            "$ref": "#/$defs/c",
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # ref valid, maxItems valid
-            ({"foo": []}, True),
-            # ref valid, maxItems invalid
-            ({"foo": [1, 2, 3]}, False),
-            # ref invalid
-            ({"foo": "string"}, False),
-        ],
-    )
-    def test_ref_applies_alongside_sibling_keywords(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$defs": {"reffed": {"type": "array"}},
-            "properties": {"foo": {"$ref": "#/$defs/reffed", "maxItems": 2}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # remote ref valid
-            ({"minLength": 1}, True),
-            # remote ref invalid
-            ({"minLength": -1}, False),
-        ],
-    )
-    @pytest.mark.xfail(reason="Remote refs are not supported")
-    def test_remote_ref_containing_refs_itself(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$ref": "https://json-schema.org/draft/2020-12/schema",
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # property named $ref valid
-            ({"$ref": "a"}, True),
-            # property named $ref invalid
-            ({"$ref": 2}, False),
-        ],
-    )
-    def test_property_named_ref_that_is_not_a_reference(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "properties": {"$ref": {"type": "string"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # property named $ref valid
-            ({"$ref": "a"}, True),
-            # property named $ref invalid
-            ({"$ref": 2}, False),
-        ],
-    )
-    def test_property_named_ref_containing_an_actual_ref(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "properties": {"$ref": {"$ref": "#/$defs/is-string"}},
-            "$defs": {"is-string": {"type": "string"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # any value is valid
-            ("foo", True)
-        ],
-    )
-    def test_ref_to_boolean_schema_true(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$ref": "#/$defs/bool",
-            "$defs": {"bool": True},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # any value is invalid
-            ("foo", False)
-        ],
-    )
-    @pytest.mark.xfail(reason="false schema is not implemented")
-    def test_ref_to_boolean_schema_false(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$ref": "#/$defs/bool",
-            "$defs": {"bool": False},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # valid tree
-            (
-                {
-                    "meta": "root",
-                    "nodes": [
-                        {
-                            "value": 1,
-                            "subtree": {
-                                "meta": "child",
-                                "nodes": [{"value": 1.1}, {"value": 1.2}],
-                            },
-                        },
-                        {
-                            "value": 2,
-                            "subtree": {
-                                "meta": "child",
-                                "nodes": [{"value": 2.1}, {"value": 2.2}],
-                            },
-                        },
-                    ],
-                },
-                True,
-            ),
-            # invalid tree
-            (
-                {
-                    "meta": "root",
-                    "nodes": [
-                        {
-                            "value": 1,
-                            "subtree": {
-                                "meta": "child",
-                                "nodes": [{"value": "string is invalid"}, {"value": 1.2}],
-                            },
-                        },
-                        {
-                            "value": 2,
-                            "subtree": {
-                                "meta": "child",
-                                "nodes": [{"value": 2.1}, {"value": 2.2}],
-                            },
-                        },
-                    ],
-                },
-                False,
-            ),
-        ],
-    )
-    def test_Recursive_references_between_schemas(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "http://localhost:1234/draft2020-12/tree",
-            "description": "tree of nodes",
-            "type": "object",
-            "properties": {
-                "meta": {"type": "string"},
-                "nodes": {"type": "array", "items": {"$ref": "node"}},
-            },
-            "required": ["meta", "nodes"],
-            "$defs": {
-                "node": {
-                    "$id": "http://localhost:1234/draft2020-12/node",
-                    "description": "node",
-                    "type": "object",
-                    "properties": {"value": {"type": "number"}, "subtree": {"$ref": "tree"}},
-                    "required": ["value"],
-                }
-            },
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # object with numbers is valid
-            ({'foo"bar': 1}, True),
-            # object with strings is invalid
-            ({'foo"bar': "1"}, False),
-        ],
-    )
-    def test_refs_with_quote(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "properties": {'foo"bar': {"$ref": "#/$defs/foo%22bar"}},
-            "$defs": {'foo"bar': {"type": "number"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # referenced subschema doesn't see annotations from properties
-            ({"prop1": "match"}, False)
-        ],
-    )
-    @pytest.mark.xfail(reason="unevaluatedProperties is not implemented")
-    def test_ref_creates_new_scope_when_adjacent_to_keywords(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$defs": {"A": {"unevaluatedProperties": False}},
-            "properties": {"prop1": {"type": "string"}},
-            "$ref": "#/$defs/A",
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # do not evaluate the $ref inside the enum, matching any string
-            ("this is a string", False),
-            # do not evaluate the $ref inside the enum, definition exact match
-            ({"type": "string"}, False),
-            # match the enum exactly
-            ({"$ref": "#/$defs/a_string"}, True),
-        ],
-    )
-    def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
-        self, test_object, valid
-    ):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$defs": {"a_string": {"type": "string"}},
-            "enum": [{"$ref": "#/$defs/a_string"}],
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # invalid on inner field
-            ({"bar": "a", "foo": {"bar": 1}}, False),
-            # invalid on outer field
-            ({ "bar": 1, "foo": {"bar": "a"}}, False),
-            # valid on both fields
-            ({"bar": "a", "foo": {"bar": "a"}, }, True),
-        ],
-    )
-    def test_refs_with_relative_uris_and_defs(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "http://example.com/schema-relative-uri-defs1.json",
-            "properties": {
-                "foo": {
-                    "$id": "schema-relative-uri-defs2.json",
-                    "$defs": {"inner": {"properties": {"bar": {"type": "string"}}}},
-                    "$ref": "#/$defs/inner",
-                }
-            },
-            "$ref": "schema-relative-uri-defs2.json",
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # invalid on inner field
-            ({"bar": "a", "foo": {"bar": 1}}, False),
-            # invalid on outer field
-            ({"bar": 1, "foo": {"bar": "a"}}, False),
-            # valid on both fields
-            ({"bar": "a", "foo": {"bar": "a"}}, True),
-        ],
-    )
-    def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "http://example.com/schema-refs-absolute-uris-defs1.json",
-            "properties": {
-                "foo": {
-                    "$id": "http://example.com/schema-refs-absolute-uris-defs2.json",
-                    "$defs": {"inner": {"properties": {"bar": {"type": "string"}}}},
-                    "$ref": "#/$defs/inner",
-                }
-            },
-            "$ref": "schema-refs-absolute-uris-defs2.json",
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # number is valid
-            (1, True),
-            # non-number is invalid
-            ("a", False),
-        ],
-    )
-    def test_id_must_be_resolved_against_nearest_parent_not_just_immediate_parent(
-        self, test_object, valid
-    ):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "http://example.com/a.json",
-            "$defs": {
-                "x": {
-                    "$id": "http://example.com/b/c.json",
-                    "not": {"$defs": {"y": {"$id": "d.json", "type": "number"}}},
-                }
-            },
-            "allOf": [{"$ref": "http://example.com/b/d.json"}],
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # data is valid against first definition
-            (5, True),
-            # data is invalid against first definition
-            (50, False),
-        ],
-    )
-    def test_order_of_evaluation_id_and_ref(self, test_object, valid):
-        schema = {
-            "$comment": "$id must be evaluated before $ref to get the proper $ref destination",
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "https://example.com/draft2020-12/ref-and-id1/base.json",
-            "$ref": "int.json",
-            "$defs": {
-                "bigint": {
-                    "$comment": "canonical uri: https://example.com/ref-and-id1/int.json",
-                    "$id": "int.json",
-                    "maximum": 10,
-                },
-                "smallint": {
-                    "$comment": "canonical uri: https://example.com/ref-and-id1-int.json",
-                    "$id": "/draft2020-12/ref-and-id1-int.json",
-                    "maximum": 2,
-                },
-            },
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # data is valid against first definition
-            (5, True),
-            # data is invalid against first definition
-            (50, False),
-        ],
-    )
-    def test_order_of_evaluation_id_and_anchor_and_ref(self, test_object, valid):
-        schema = {
-            "$comment": "$id must be evaluated before $ref to get the proper $ref destination",
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "https://example.com/draft2020-12/ref-and-id2/base.json",
-            "$ref": "#bigint",
-            "$defs": {
-                "bigint": {
-                    "$comment": "canonical uri: /ref-and-id2/base.json#/$defs/bigint; another valid uri for this location: /ref-and-id2/base.json#bigint",
-                    "$anchor": "bigint",
-                    "maximum": 10,
-                },
-                "smallint": {
-                    "$comment": "canonical uri: https://example.com/ref-and-id2#/$defs/smallint; another valid uri for this location: https://example.com/ref-and-id2/#bigint",
-                    "$id": "https://example.com/draft2020-12/ref-and-id2/",
-                    "$anchor": "bigint",
-                    "maximum": 2,
-                },
-            },
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # valid under the URN IDed schema
-            ({"foo": 37}, True),
-            # invalid under the URN IDed schema
-            ({"foo": 12}, False),
-        ],
-    )
-    def test_simple_URN_base_URI_with_ref_via_the_URN(self, test_object, valid):
-        schema = {
-            "$comment": "URIs do not have to have HTTP(s) schemes",
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "urn:uuid:deadbeef-1234-ffff-ffff-4321feebdaed",
-            "minimum": 30,
-            "properties": {"foo": {"$ref": "urn:uuid:deadbeef-1234-ffff-ffff-4321feebdaed"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a string is valid
-            ({"foo": "bar"}, True),
-            # a non-string is invalid
-            ({"foo": 12}, False),
-        ],
-    )
-    def test_simple_URN_base_URI_with_JSON_pointer(self, test_object, valid):
-        schema = {
-            "$comment": "URIs do not have to have HTTP(s) schemes",
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "urn:uuid:deadbeef-1234-00ff-ff00-4321feebdaed",
-            "properties": {"foo": {"$ref": "#/$defs/bar"}},
-            "$defs": {"bar": {"type": "string"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a string is valid
-            ({"foo": "bar"}, True),
-            # a non-string is invalid
-            ({"foo": 12}, False),
-        ],
-    )
-    def test_URN_base_URI_with_NSS(self, test_object, valid):
-        schema = {
-            "$comment": "RFC 8141 §2.2",
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "urn:example:1/406/47452/2",
-            "properties": {"foo": {"$ref": "#/$defs/bar"}},
-            "$defs": {"bar": {"type": "string"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a string is valid
-            ({"foo": "bar"}, True),
-            # a non-string is invalid
-            ({"foo": 12}, False),
-        ],
-    )
-    def test_URN_base_URI_with_r_component(self, test_object, valid):
-        schema = {
-            "$comment": "RFC 8141 §2.3.1",
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "urn:example:foo-bar-baz-qux?+CCResolve:cc=uk",
-            "properties": {"foo": {"$ref": "#/$defs/bar"}},
-            "$defs": {"bar": {"type": "string"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a string is valid
-            ({"foo": "bar"}, True),
-            # a non-string is invalid
-            ({"foo": 12}, False),
-        ],
-    )
-    def test_URN_base_URI_with_q_component(self, test_object, valid):
-        schema = {
-            "$comment": "RFC 8141 §2.3.2",
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "urn:example:weather?=op=map&lat=39.56&lon=-104.85&datetime=1969-07-21T02:56:15Z",
-            "properties": {"foo": {"$ref": "#/$defs/bar"}},
-            "$defs": {"bar": {"type": "string"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a string is valid
-            ({"foo": "bar"}, True),
-            # a non-string is invalid
-            ({"foo": 12}, False),
-        ],
-    )
-    def test_URN_base_URI_with_URN_and_JSON_pointer_ref(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "urn:uuid:deadbeef-1234-0000-0000-4321feebdaed",
-            "properties": {
-                "foo": {"$ref": "urn:uuid:deadbeef-1234-0000-0000-4321feebdaed#/$defs/bar"}
-            },
-            "$defs": {"bar": {"type": "string"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a string is valid
-            ({"foo": "bar"}, True),
-            # a non-string is invalid
-            ({"foo": 12}, False),
-        ],
-    )
-    def test_URN_base_URI_with_URN_and_anchor_ref(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "urn:uuid:deadbeef-1234-ff00-00ff-4321feebdaed",
-            "properties": {
-                "foo": {"$ref": "urn:uuid:deadbeef-1234-ff00-00ff-4321feebdaed#something"}
-            },
-            "$defs": {"bar": {"$anchor": "something", "type": "string"}},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a string is valid
-            ("bar", True),
-            # a non-string is invalid
-            (12, False),
-        ],
-    )
-    def test_URN_ref_with_nested_pointer_ref(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$ref": "urn:uuid:deadbeef-4321-ffff-ffff-1234feebdaed",
-            "$defs": {
-                "foo": {
-                    "$id": "urn:uuid:deadbeef-4321-ffff-ffff-1234feebdaed",
-                    "$defs": {"bar": {"type": "string"}},
-                    "$ref": "#/$defs/bar",
-                }
-            },
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a non-integer is invalid due to the $ref
-            ("foo", False),
-            # an integer is valid
-            (12, True),
-        ],
-    )
-    @pytest.mark.xfail(reason="if not implemented")
-    def test_ref_to_if(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$ref": "http://example.com/ref/if",
-            "if": {"$id": "http://example.com/ref/if", "type": "integer"},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a non-integer is invalid due to the $ref
-            ("foo", False),
-            # an integer is valid
-            (12, True),
-        ],
-    )
-    @pytest.mark.xfail(reason="then not implemented")
-    def test_ref_to_then(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$ref": "http://example.com/ref/then",
-            "then": {"$id": "http://example.com/ref/then", "type": "integer"},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a non-integer is invalid due to the $ref
-            ("foo", False),
-            # an integer is valid
-            (12, True),
-        ],
-    )
-    @pytest.mark.xfail(reason="else not implemented")
-    def test_ref_to_else(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$ref": "http://example.com/ref/else",
-            "else": {"$id": "http://example.com/ref/else", "type": "integer"},
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # a string is valid
-            ("foo", True),
-            # an integer is invalid
-            (12, False),
-        ],
-    )
-    def test_ref_with_absolute_path_reference(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "http://example.com/ref/absref.json",
-            "$defs": {
-                "a": {"$id": "http://example.com/ref/absref/foobar.json", "type": "number"},
-                "b": {"$id": "http://example.com/absref/foobar.json", "type": "string"},
-            },
-            "$ref": "/absref/foobar.json",
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # number is valid
-            (1, True),
-            # non-number is invalid
-            ("a", False),
-        ],
-    )
-    def test_id_with_file_URI_still_resolves_pointers___nix(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "file:///folder/file.json",
-            "$defs": {"foo": {"type": "number"}},
-            "$ref": "#/$defs/foo",
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # number is valid
-            (1, True),
-            # non-number is invalid
-            ("a", False),
-        ],
-    )
-    def test_id_with_file_URI_still_resolves_pointers___windows(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "file:///c:/folder/file.json",
-            "$defs": {"foo": {"type": "number"}},
-            "$ref": "#/$defs/foo",
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-    @pytest.mark.parametrize(
-        ["test_object", "valid"],
-        [
-            # number is valid
-            (1, True),
-            # non-number is invalid
-            ("a", False),
-        ],
-    )
-    def test_empty_tokens_in_ref_json_pointer(self, test_object, valid):
-        schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$defs": {"": {"$defs": {"": {"type": "number"}}}},
-            "allOf": [{"$ref": "#/$defs//$defs/"}],
-        }
-        if valid:
-            validate(instance=test_object, schema=schema)
-            generate_and_check(test_object, schema)
-        else:
-            with pytest.raises(ValidationError):
-                validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-
-
 class TestAnyOf:
     @pytest.mark.parametrize("target_obj", [123, True])
     @pytest.mark.parametrize("temperature", [None, 0.1, 1])
diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py
new file mode 100644
index 000000000..49f035283
--- /dev/null
+++ b/tests/unit/library/json/test_refs.py
@@ -0,0 +1,978 @@
+from .utils import check_match_failure, generate_and_check
+
+import pytest
+from jsonschema import ValidationError, validate
+
+from json import dumps as json_dumps
+
+
+class TestRefs:
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # match
+            ({"foo": False}, True),
+            # recursive match
+            ({"foo": {"foo": False}}, True),
+            # mismatch
+            ({"bar": False}, False),
+            # recursive mismatch
+            ({"foo": {"bar": False}}, False),
+        ],
+    )
+    def test_root_pointer_ref(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "properties": {"foo": {"$ref": "#"}},
+            "additionalProperties": False,
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # match
+            ({"bar": 3}, True),
+            # mismatch
+            ({"bar": True}, False),
+        ],
+    )
+    def test_relative_pointer_ref_to_object(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "properties": {"foo": {"type": "integer"}, "bar": {"$ref": "#/properties/foo"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # match array
+            ([1, 2], True),
+            # mismatch array
+            ([1, "foo"], False),
+        ],
+    )
+    def test_relative_pointer_ref_to_array(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "prefixItems": [{"type": "integer"}, {"$ref": "#/prefixItems/0"}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # slash invalid
+            ({"slash": "aoeu"}, False),
+            # tilde invalid
+            ({"tilde": "aoeu"}, False),
+            # percent invalid
+            ({"percent": "aoeu"}, False),
+            # slash valid
+            ({"slash": 123}, True),
+            # tilde valid
+            ({"tilde": 123}, True),
+            # percent valid
+            ({"percent": 123}, True),
+        ],
+    )
+    def test_escaped_pointer_ref(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$defs": {
+                "tilde~field": {"type": "integer"},
+                "slash/field": {"type": "integer"},
+                "percent%field": {"type": "integer"},
+            },
+            "properties": {
+                "tilde": {"$ref": "#/$defs/tilde~0field"},
+                "slash": {"$ref": "#/$defs/slash~1field"},
+                "percent": {"$ref": "#/$defs/percent%25field"},
+            },
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # nested ref valid
+            (5, True),
+            # nested ref invalid
+            ("a", False),
+        ],
+    )
+    def test_nested_refs(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$defs": {
+                "a": {"type": "integer"},
+                "b": {"$ref": "#/$defs/a"},
+                "c": {"$ref": "#/$defs/b"},
+            },
+            "$ref": "#/$defs/c",
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # ref valid, maxItems valid
+            ({"foo": []}, True),
+            # ref valid, maxItems invalid
+            ({"foo": [1, 2, 3]}, False),
+            # ref invalid
+            ({"foo": "string"}, False),
+        ],
+    )
+    def test_ref_applies_alongside_sibling_keywords(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$defs": {"reffed": {"type": "array"}},
+            "properties": {"foo": {"$ref": "#/$defs/reffed", "maxItems": 2}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # remote ref valid
+            ({"minLength": 1}, True),
+            # remote ref invalid
+            ({"minLength": -1}, False),
+        ],
+    )
+    @pytest.mark.xfail(reason="Remote refs are not supported")
+    def test_remote_ref_containing_refs_itself(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$ref": "https://json-schema.org/draft/2020-12/schema",
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # property named $ref valid
+            ({"$ref": "a"}, True),
+            # property named $ref invalid
+            ({"$ref": 2}, False),
+        ],
+    )
+    def test_property_named_ref_that_is_not_a_reference(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "properties": {"$ref": {"type": "string"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # property named $ref valid
+            ({"$ref": "a"}, True),
+            # property named $ref invalid
+            ({"$ref": 2}, False),
+        ],
+    )
+    def test_property_named_ref_containing_an_actual_ref(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "properties": {"$ref": {"$ref": "#/$defs/is-string"}},
+            "$defs": {"is-string": {"type": "string"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # any value is valid
+            ("foo", True)
+        ],
+    )
+    def test_ref_to_boolean_schema_true(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$ref": "#/$defs/bool",
+            "$defs": {"bool": True},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # any value is invalid
+            ("foo", False)
+        ],
+    )
+    @pytest.mark.xfail(reason="false schema is not implemented")
+    def test_ref_to_boolean_schema_false(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$ref": "#/$defs/bool",
+            "$defs": {"bool": False},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # valid tree
+            (
+                {
+                    "meta": "root",
+                    "nodes": [
+                        {
+                            "value": 1,
+                            "subtree": {
+                                "meta": "child",
+                                "nodes": [{"value": 1.1}, {"value": 1.2}],
+                            },
+                        },
+                        {
+                            "value": 2,
+                            "subtree": {
+                                "meta": "child",
+                                "nodes": [{"value": 2.1}, {"value": 2.2}],
+                            },
+                        },
+                    ],
+                },
+                True,
+            ),
+            # invalid tree
+            (
+                {
+                    "meta": "root",
+                    "nodes": [
+                        {
+                            "value": 1,
+                            "subtree": {
+                                "meta": "child",
+                                "nodes": [{"value": "string is invalid"}, {"value": 1.2}],
+                            },
+                        },
+                        {
+                            "value": 2,
+                            "subtree": {
+                                "meta": "child",
+                                "nodes": [{"value": 2.1}, {"value": 2.2}],
+                            },
+                        },
+                    ],
+                },
+                False,
+            ),
+        ],
+    )
+    def test_Recursive_references_between_schemas(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "http://localhost:1234/draft2020-12/tree",
+            "description": "tree of nodes",
+            "type": "object",
+            "properties": {
+                "meta": {"type": "string"},
+                "nodes": {"type": "array", "items": {"$ref": "node"}},
+            },
+            "required": ["meta", "nodes"],
+            "$defs": {
+                "node": {
+                    "$id": "http://localhost:1234/draft2020-12/node",
+                    "description": "node",
+                    "type": "object",
+                    "properties": {"value": {"type": "number"}, "subtree": {"$ref": "tree"}},
+                    "required": ["value"],
+                }
+            },
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # object with numbers is valid
+            ({'foo"bar': 1}, True),
+            # object with strings is invalid
+            ({'foo"bar': "1"}, False),
+        ],
+    )
+    def test_refs_with_quote(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "properties": {'foo"bar': {"$ref": "#/$defs/foo%22bar"}},
+            "$defs": {'foo"bar': {"type": "number"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # referenced subschema doesn't see annotations from properties
+            ({"prop1": "match"}, False)
+        ],
+    )
+    @pytest.mark.xfail(reason="unevaluatedProperties is not implemented")
+    def test_ref_creates_new_scope_when_adjacent_to_keywords(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$defs": {"A": {"unevaluatedProperties": False}},
+            "properties": {"prop1": {"type": "string"}},
+            "$ref": "#/$defs/A",
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # do not evaluate the $ref inside the enum, matching any string
+            ("this is a string", False),
+            # do not evaluate the $ref inside the enum, definition exact match
+            ({"type": "string"}, False),
+            # match the enum exactly
+            ({"$ref": "#/$defs/a_string"}, True),
+        ],
+    )
+    def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
+        self, test_object, valid
+    ):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$defs": {"a_string": {"type": "string"}},
+            "enum": [{"$ref": "#/$defs/a_string"}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # invalid on inner field
+            ({"bar": "a", "foo": {"bar": 1}}, False),
+            # invalid on outer field
+            ({ "bar": 1, "foo": {"bar": "a"}}, False),
+            # valid on both fields
+            ({"bar": "a", "foo": {"bar": "a"}, }, True),
+        ],
+    )
+    def test_refs_with_relative_uris_and_defs(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "http://example.com/schema-relative-uri-defs1.json",
+            "properties": {
+                "foo": {
+                    "$id": "schema-relative-uri-defs2.json",
+                    "$defs": {"inner": {"properties": {"bar": {"type": "string"}}}},
+                    "$ref": "#/$defs/inner",
+                }
+            },
+            "$ref": "schema-relative-uri-defs2.json",
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # invalid on inner field
+            ({"bar": "a", "foo": {"bar": 1}}, False),
+            # invalid on outer field
+            ({"bar": 1, "foo": {"bar": "a"}}, False),
+            # valid on both fields
+            ({"bar": "a", "foo": {"bar": "a"}}, True),
+        ],
+    )
+    def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "http://example.com/schema-refs-absolute-uris-defs1.json",
+            "properties": {
+                "foo": {
+                    "$id": "http://example.com/schema-refs-absolute-uris-defs2.json",
+                    "$defs": {"inner": {"properties": {"bar": {"type": "string"}}}},
+                    "$ref": "#/$defs/inner",
+                }
+            },
+            "$ref": "schema-refs-absolute-uris-defs2.json",
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # number is valid
+            (1, True),
+            # non-number is invalid
+            ("a", False),
+        ],
+    )
+    def test_id_must_be_resolved_against_nearest_parent_not_just_immediate_parent(
+        self, test_object, valid
+    ):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "http://example.com/a.json",
+            "$defs": {
+                "x": {
+                    "$id": "http://example.com/b/c.json",
+                    "not": {"$defs": {"y": {"$id": "d.json", "type": "number"}}},
+                }
+            },
+            "allOf": [{"$ref": "http://example.com/b/d.json"}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # data is valid against first definition
+            (5, True),
+            # data is invalid against first definition
+            (50, False),
+        ],
+    )
+    def test_order_of_evaluation_id_and_ref(self, test_object, valid):
+        schema = {
+            "$comment": "$id must be evaluated before $ref to get the proper $ref destination",
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "https://example.com/draft2020-12/ref-and-id1/base.json",
+            "$ref": "int.json",
+            "$defs": {
+                "bigint": {
+                    "$comment": "canonical uri: https://example.com/ref-and-id1/int.json",
+                    "$id": "int.json",
+                    "maximum": 10,
+                },
+                "smallint": {
+                    "$comment": "canonical uri: https://example.com/ref-and-id1-int.json",
+                    "$id": "/draft2020-12/ref-and-id1-int.json",
+                    "maximum": 2,
+                },
+            },
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # data is valid against first definition
+            (5, True),
+            # data is invalid against first definition
+            (50, False),
+        ],
+    )
+    def test_order_of_evaluation_id_and_anchor_and_ref(self, test_object, valid):
+        schema = {
+            "$comment": "$id must be evaluated before $ref to get the proper $ref destination",
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "https://example.com/draft2020-12/ref-and-id2/base.json",
+            "$ref": "#bigint",
+            "$defs": {
+                "bigint": {
+                    "$comment": "canonical uri: /ref-and-id2/base.json#/$defs/bigint; another valid uri for this location: /ref-and-id2/base.json#bigint",
+                    "$anchor": "bigint",
+                    "maximum": 10,
+                },
+                "smallint": {
+                    "$comment": "canonical uri: https://example.com/ref-and-id2#/$defs/smallint; another valid uri for this location: https://example.com/ref-and-id2/#bigint",
+                    "$id": "https://example.com/draft2020-12/ref-and-id2/",
+                    "$anchor": "bigint",
+                    "maximum": 2,
+                },
+            },
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # valid under the URN IDed schema
+            ({"foo": 37}, True),
+            # invalid under the URN IDed schema
+            ({"foo": 12}, False),
+        ],
+    )
+    def test_simple_URN_base_URI_with_ref_via_the_URN(self, test_object, valid):
+        schema = {
+            "$comment": "URIs do not have to have HTTP(s) schemes",
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "urn:uuid:deadbeef-1234-ffff-ffff-4321feebdaed",
+            "minimum": 30,
+            "properties": {"foo": {"$ref": "urn:uuid:deadbeef-1234-ffff-ffff-4321feebdaed"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a string is valid
+            ({"foo": "bar"}, True),
+            # a non-string is invalid
+            ({"foo": 12}, False),
+        ],
+    )
+    def test_simple_URN_base_URI_with_JSON_pointer(self, test_object, valid):
+        schema = {
+            "$comment": "URIs do not have to have HTTP(s) schemes",
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "urn:uuid:deadbeef-1234-00ff-ff00-4321feebdaed",
+            "properties": {"foo": {"$ref": "#/$defs/bar"}},
+            "$defs": {"bar": {"type": "string"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a string is valid
+            ({"foo": "bar"}, True),
+            # a non-string is invalid
+            ({"foo": 12}, False),
+        ],
+    )
+    def test_URN_base_URI_with_NSS(self, test_object, valid):
+        schema = {
+            "$comment": "RFC 8141 §2.2",
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "urn:example:1/406/47452/2",
+            "properties": {"foo": {"$ref": "#/$defs/bar"}},
+            "$defs": {"bar": {"type": "string"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a string is valid
+            ({"foo": "bar"}, True),
+            # a non-string is invalid
+            ({"foo": 12}, False),
+        ],
+    )
+    def test_URN_base_URI_with_r_component(self, test_object, valid):
+        schema = {
+            "$comment": "RFC 8141 §2.3.1",
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "urn:example:foo-bar-baz-qux?+CCResolve:cc=uk",
+            "properties": {"foo": {"$ref": "#/$defs/bar"}},
+            "$defs": {"bar": {"type": "string"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a string is valid
+            ({"foo": "bar"}, True),
+            # a non-string is invalid
+            ({"foo": 12}, False),
+        ],
+    )
+    def test_URN_base_URI_with_q_component(self, test_object, valid):
+        schema = {
+            "$comment": "RFC 8141 §2.3.2",
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "urn:example:weather?=op=map&lat=39.56&lon=-104.85&datetime=1969-07-21T02:56:15Z",
+            "properties": {"foo": {"$ref": "#/$defs/bar"}},
+            "$defs": {"bar": {"type": "string"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a string is valid
+            ({"foo": "bar"}, True),
+            # a non-string is invalid
+            ({"foo": 12}, False),
+        ],
+    )
+    def test_URN_base_URI_with_URN_and_JSON_pointer_ref(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "urn:uuid:deadbeef-1234-0000-0000-4321feebdaed",
+            "properties": {
+                "foo": {"$ref": "urn:uuid:deadbeef-1234-0000-0000-4321feebdaed#/$defs/bar"}
+            },
+            "$defs": {"bar": {"type": "string"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a string is valid
+            ({"foo": "bar"}, True),
+            # a non-string is invalid
+            ({"foo": 12}, False),
+        ],
+    )
+    def test_URN_base_URI_with_URN_and_anchor_ref(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "urn:uuid:deadbeef-1234-ff00-00ff-4321feebdaed",
+            "properties": {
+                "foo": {"$ref": "urn:uuid:deadbeef-1234-ff00-00ff-4321feebdaed#something"}
+            },
+            "$defs": {"bar": {"$anchor": "something", "type": "string"}},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a string is valid
+            ("bar", True),
+            # a non-string is invalid
+            (12, False),
+        ],
+    )
+    def test_URN_ref_with_nested_pointer_ref(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$ref": "urn:uuid:deadbeef-4321-ffff-ffff-1234feebdaed",
+            "$defs": {
+                "foo": {
+                    "$id": "urn:uuid:deadbeef-4321-ffff-ffff-1234feebdaed",
+                    "$defs": {"bar": {"type": "string"}},
+                    "$ref": "#/$defs/bar",
+                }
+            },
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a non-integer is invalid due to the $ref
+            ("foo", False),
+            # an integer is valid
+            (12, True),
+        ],
+    )
+    @pytest.mark.xfail(reason="if not implemented")
+    def test_ref_to_if(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$ref": "http://example.com/ref/if",
+            "if": {"$id": "http://example.com/ref/if", "type": "integer"},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a non-integer is invalid due to the $ref
+            ("foo", False),
+            # an integer is valid
+            (12, True),
+        ],
+    )
+    @pytest.mark.xfail(reason="then not implemented")
+    def test_ref_to_then(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$ref": "http://example.com/ref/then",
+            "then": {"$id": "http://example.com/ref/then", "type": "integer"},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a non-integer is invalid due to the $ref
+            ("foo", False),
+            # an integer is valid
+            (12, True),
+        ],
+    )
+    @pytest.mark.xfail(reason="else not implemented")
+    def test_ref_to_else(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$ref": "http://example.com/ref/else",
+            "else": {"$id": "http://example.com/ref/else", "type": "integer"},
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # a string is valid
+            ("foo", True),
+            # an integer is invalid
+            (12, False),
+        ],
+    )
+    def test_ref_with_absolute_path_reference(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "http://example.com/ref/absref.json",
+            "$defs": {
+                "a": {"$id": "http://example.com/ref/absref/foobar.json", "type": "number"},
+                "b": {"$id": "http://example.com/absref/foobar.json", "type": "string"},
+            },
+            "$ref": "/absref/foobar.json",
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # number is valid
+            (1, True),
+            # non-number is invalid
+            ("a", False),
+        ],
+    )
+    def test_id_with_file_URI_still_resolves_pointers___nix(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "file:///folder/file.json",
+            "$defs": {"foo": {"type": "number"}},
+            "$ref": "#/$defs/foo",
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # number is valid
+            (1, True),
+            # non-number is invalid
+            ("a", False),
+        ],
+    )
+    def test_id_with_file_URI_still_resolves_pointers___windows(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$id": "file:///c:/folder/file.json",
+            "$defs": {"foo": {"type": "number"}},
+            "$ref": "#/$defs/foo",
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
+    @pytest.mark.parametrize(
+        ["test_object", "valid"],
+        [
+            # number is valid
+            (1, True),
+            # non-number is invalid
+            ("a", False),
+        ],
+    )
+    def test_empty_tokens_in_ref_json_pointer(self, test_object, valid):
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$defs": {"": {"$defs": {"": {"type": "number"}}}},
+            "allOf": [{"$ref": "#/$defs//$defs/"}],
+        }
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
+                validate(instance=test_object, schema=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
\ No newline at end of file
diff --git a/tests/unit/library/test_json_stringformat.py b/tests/unit/library/json/test_string_format.py
similarity index 99%
rename from tests/unit/library/test_json_stringformat.py
rename to tests/unit/library/json/test_string_format.py
index b484ccca2..3259274bb 100644
--- a/tests/unit/library/test_json_stringformat.py
+++ b/tests/unit/library/json/test_string_format.py
@@ -2,7 +2,9 @@
 
 import pytest
 import json
-from .test_json import generate_and_check, check_match_failure
+
+from .utils import generate_and_check
+from .utils import check_match_failure
 
 
 class TestDate:
diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py
new file mode 100644
index 000000000..920571d27
--- /dev/null
+++ b/tests/unit/library/json/utils.py
@@ -0,0 +1,58 @@
+from typing import Union, Optional, Any, Set
+from guidance import json as gen_json
+from guidance.library._json import JSONSchema
+
+from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check
+
+from jsonschema import validate
+
+
+import json
+from functools import partial
+from json import dumps as json_dumps, loads as json_loads
+
+
+def generate_and_check(
+    target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None
+):
+    if isinstance(schema_obj, str):
+        schema_obj = json_loads(schema_obj)
+
+    # Sanity check what we're being asked
+    validate(instance=target_obj, schema=schema_obj)
+    prepared_json = json_dumps(target_obj)
+    assert json.loads(prepared_json) == target_obj
+
+    # Now test that the grammar can recognize and generate prepared_json
+    # We partial in the grammar_callable
+    if desired_temperature is not None:
+        grammar_callable = partial(
+            gen_json, schema=schema_obj, temperature=desired_temperature
+        )
+    else:
+        grammar_callable = partial(gen_json, schema=schema_obj)
+
+    lm = _generate_and_check(
+        grammar_callable,
+        test_string=prepared_json,
+    )
+    check_run_with_temperature(lm, desired_temperature)
+
+
+def check_match_failure(
+    *,
+    bad_string: str,
+    good_bytes: Optional[bytes] = None,
+    failure_byte: Optional[bytes] = None,
+    allowed_bytes: Optional[Set[bytes]] = None,
+    schema_obj: Union[str, JSONSchema],
+):
+    grammar = gen_json(schema=schema_obj)
+
+    _check_match_failure(
+        bad_string=bad_string,
+        good_bytes=good_bytes,
+        failure_byte=failure_byte,
+        allowed_bytes=allowed_bytes,
+        grammar=grammar,
+    )
\ No newline at end of file

From 496718ed7e8a0cc62744401d174831e03ac352f1 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 09:46:27 -0700
Subject: [PATCH 37/70] blacken json tests

---
 tests/unit/library/json/test_allOf.py         |  61 +++--
 tests/unit/library/json/test_json.py          | 259 ++++++++----------
 tests/unit/library/json/test_refs.py          |  16 +-
 tests/unit/library/json/test_string_format.py | 140 +++++++---
 tests/unit/library/json/utils.py              |  28 +-
 5 files changed, 291 insertions(+), 213 deletions(-)

diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py
index 74aa539b6..1a388d2ce 100644
--- a/tests/unit/library/json/test_allOf.py
+++ b/tests/unit/library/json/test_allOf.py
@@ -6,8 +6,8 @@
 from jsonschema import ValidationError, validate
 
 from guidance import json as gen_json
-from .utils import generate_and_check
-from .utils import check_match_failure
+
+from .utils import check_match_failure, generate_and_check
 
 
 class TestAllOf:
@@ -139,7 +139,6 @@ def test_allOf_simple_maximum(self, test_object, valid):
                 validate(instance=test_object, schema=schema)
             check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
 
-
     @pytest.mark.parametrize(
         ["test_object", "valid"],
         [
@@ -358,15 +357,18 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
             ({"foo": 0, "bar": 5, "baz": 4}, False),
             # invalid: baz is not an integer or null
             ({"foo": 0, "bar": 5, "baz": "quxx"}, False),
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "schema",
         [
             # The following are equivalent to this:
             {
-                "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}},
-                "additionalProperties": {"type": ["integer", "null"], "minimum": 5}
+                "properties": {
+                    "foo": {"type": ["integer", "null"], "maximum": 4},
+                    "bar": {"minimum": 5, "maximum": 5},
+                },
+                "additionalProperties": {"type": ["integer", "null"], "minimum": 5},
             },
             # additionalProperties in parent schema
             {
@@ -374,16 +376,22 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
                     {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}
                 ],
                 "properties": {"bar": {"maximum": 5}},
-                "additionalProperties": {"type": ["integer", "null"]}
+                "additionalProperties": {"type": ["integer", "null"]},
             },
             # additionalProperties in allOf
             {
                 "allOf": [
-                    {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}},
-                    {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}}
+                    {
+                        "properties": {"foo": {"maximum": 4}},
+                        "additionalProperties": {"minimum": 5},
+                    },
+                    {
+                        "properties": {"bar": {"maximum": 5}},
+                        "additionalProperties": {"type": ["integer", "null"]},
+                    },
                 ]
             },
-        ]
+        ],
     )
     def test_additionalProperties_in_allOf(self, schema, test_object, valid):
         if valid:
@@ -397,19 +405,19 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid):
     @pytest.mark.parametrize(
         "test_object, valid",
         [
-            ({}, True), # empty object is valid
-            ({"foo": 1}, False), # foo is not a string
-            ({"foo": "x"}, False), # foo is not an integer
-            ({"foo": True}, False), # foo is not a string or an integer
-        ]
+            ({}, True),  # empty object is valid
+            ({"foo": 1}, False),  # foo is not a string
+            ({"foo": "x"}, False),  # foo is not an integer
+            ({"foo": True}, False),  # foo is not a string or an integer
+        ],
     )
     def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
         schema = {
             "type": "object",
             "allOf": [
                 {"additionalProperties": {"type": "integer"}},
-                {"additionalProperties": {"type": "string"}}
-            ]
+                {"additionalProperties": {"type": "string"}},
+            ],
         }
         try:
             if valid:
@@ -448,15 +456,18 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
             ([0, 5, 4], False),
             # invalid: baz is not an integer or null
             ([0, 5, "quxx"], False),
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "schema",
         [
             # The following are equivalent to this:
             {
-                "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}],
-                "items": {"type": ["integer", "null"], "minimum": 5}
+                "prefixItems": [
+                    {"type": ["integer", "null"], "maximum": 4},
+                    {"minimum": 5, "maximum": 5},
+                ],
+                "items": {"type": ["integer", "null"], "minimum": 5},
             },
             # items in parent schema
             {
@@ -464,17 +475,19 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
                     {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}},
                 ],
                 "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}],
-                "items": {"type": ["integer", "null"]}
-
+                "items": {"type": ["integer", "null"]},
             },
             # items in allOf
             {
                 "allOf": [
                     {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}},
-                    {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}}
+                    {
+                        "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}],
+                        "items": {"type": ["integer", "null"]},
+                    },
                 ]
             },
-        ]
+        ],
     )
     def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid):
         if valid:
diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index d94a98b69..f3602fddd 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -1,21 +1,21 @@
 import json
+from json import dumps as json_dumps
 
 import pytest
-from jsonschema import validate, ValidationError
-from json import dumps as json_dumps
+from jsonschema import ValidationError, validate
 
 from guidance import json as gen_json
 from guidance import models
-
 from guidance.library._json import IGNORED_KEYS
-from .utils import check_match_failure, generate_and_check
 
+from .utils import check_match_failure, generate_and_check
 
 # Common sets of allowed_bytes
 INTEGER_LEADING = {b"-", b"0", *{bytes([i]) for i in range(ord("1"), ord("9") + 1)}}
 INTEGER_FOLLOWING = {bytes([i]) for i in range(ord("0"), ord("9") + 1)}
 A_to_Z = {bytes([i]) for i in range(ord("A"), ord("Z") + 1)}
 
+
 def test_null():
     schema = """{"type": "null" }"""
 
@@ -130,6 +130,7 @@ def test_bad_number(self, bad_string, good_bytes, failure_byte, allowed_bytes):
             schema_obj=schema_obj,
         )
 
+
 class TestBoundedNumeric:
     @pytest.mark.parametrize(
         "instance, schema, should_pass",
@@ -139,11 +140,15 @@ class TestBoundedNumeric:
             (-5, {"type": "integer", "minimum": -5}, True),
             pytest.param(
                 *(5.0, {"type": "integer", "minimum": 5}, True),
-                marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't")
+                marks=pytest.mark.xfail(
+                    reason="JSON technically allows trailing zeroes, but we currently don't"
+                ),
             ),
             pytest.param(
                 *(-5.0, {"type": "integer", "minimum": -5}, True),
-                marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't")
+                marks=pytest.mark.xfail(
+                    reason="JSON technically allows trailing zeroes, but we currently don't"
+                ),
             ),
             (5.1, {"type": "integer", "minimum": 5}, False),
             (-5.1, {"type": "integer", "minimum": -5}, False),
@@ -203,7 +208,11 @@ class TestBoundedNumeric:
             (5.1, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True),
             (-9.9, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True),
             (5.0, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, False),
-            (-10.0, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, False),
+            (
+                -10.0,
+                {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0},
+                False,
+            ),
             (9.9, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True),
             (-5.1, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True),
             # --- Edge cases ---
@@ -244,10 +253,10 @@ class TestBoundedNumeric:
             (0.2999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, True),
             (-0.2999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, True),
             (0.0999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False),
-            (-0.0999, {"type": "number", "minimum": -.3, "maximum": -0.1}, False),
+            (-0.0999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False),
             (0.3001, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False),
             (-0.3001, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False),
-        ]
+        ],
     )
     def test_numeric_validation(self, instance, schema, should_pass):
         # Sanity check
@@ -257,10 +266,7 @@ def test_numeric_validation(self, instance, schema, should_pass):
         else:
             with pytest.raises(ValidationError):
                 validate(instance, schema=schema)
-            check_match_failure(
-                bad_string=json_dumps(instance),
-                schema_obj=schema
-            )
+            check_match_failure(bad_string=json_dumps(instance), schema_obj=schema)
 
 
 class TestString:
@@ -341,9 +347,7 @@ def test_regex_bad(self, bad_string: str, good_bytes, failure_byte, allowed_byte
             schema_obj=schema_obj,
         )
 
-    @pytest.mark.parametrize(
-        "string", ["aA\u001f", '"""']
-    )
+    @pytest.mark.parametrize("string", ["aA\u001f", '"""'])
     def test_regex_properly_escaped_good(self, string):
         schema_obj = {"type": "string", "pattern": r".{3}"}
         # First sanity check what we're setting up
@@ -356,13 +360,15 @@ def test_regex_properly_escaped_good(self, string):
         [
             (
                 '"\\u001f\\u001f\u001f',
-                b'"\\u001f\\u001f', # able to match the first two stringified bytes
-                '\u001f'.encode(), # fails on a literal \x1f byte
-                None # hard to write a set of allowed bytes here
+                b'"\\u001f\\u001f',  # able to match the first two stringified bytes
+                "\u001f".encode(),  # fails on a literal \x1f byte
+                None,  # hard to write a set of allowed bytes here
             ),
         ],
     )
-    def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_byte, allowed_bytes):
+    def test_regex_properly_escaped_bad(
+        self, bad_string: str, good_bytes, failure_byte, allowed_bytes
+    ):
         # Note that the strings being fed in include the double quotes required
         # to make them JSON strings
         schema_obj = {"type": "string", "pattern": r".{3}"}
@@ -374,7 +380,6 @@ def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_b
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "my_string", ["a", "bb", "ccc", "150", ",?", ".\t\n", "(){", "aA7", "\\9O"]
     )
@@ -673,28 +678,37 @@ def test_required_is_required(self):
         generate_and_check({"b": 1}, schema)
         generate_and_check({"a": 1, "b": "xyz"}, schema)
         check_match_failure(
-            bad_string=json_dumps(
-                {"a": 1}
-            ),
+            bad_string=json_dumps({"a": 1}),
             schema_obj=schema,
         )
 
     def test_validated_against_additionalProperties(self):
-        schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b"], "additionalProperties": {"type": "integer"}}
+        schema = {
+            "type": "object",
+            "properties": {"a": {"type": "integer"}},
+            "required": ["b"],
+            "additionalProperties": {"type": "integer"},
+        }
         generate_and_check({"b": 1}, schema)
         generate_and_check({"a": 1, "b": 42}, schema)
         check_match_failure(
-            bad_string=json_dumps(
-                {"a": 1, "b": "string"}
-            ),
+            bad_string=json_dumps({"a": 1, "b": "string"}),
             schema_obj=schema,
         )
 
     def test_false_additionalProperties_fails(self):
-        schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b", "c"], "additionalProperties": False}
+        schema = {
+            "type": "object",
+            "properties": {"a": {"type": "integer"}},
+            "required": ["b", "c"],
+            "additionalProperties": False,
+        }
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
-        assert ve.value.args[0] == "Required properties not in properties but additionalProperties is False. Missing required properties: ['b', 'c']"
+        assert (
+            ve.value.args[0]
+            == "Required properties not in properties but additionalProperties is False. Missing required properties: ['b', 'c']"
+        )
 
 
 class TestSimpleArray:
@@ -760,7 +774,6 @@ def test_object_list(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         ["bad_string", "good_bytes", "failure_byte", "allowed_bytes"],
         [
@@ -870,7 +883,6 @@ def test_good_with_items(self, min_items, max_items, target_obj):
         }
         generate_and_check(target_obj, schema_obj)
 
-
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -951,7 +963,6 @@ def test_bad_with_prefix_and_items(
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1016,7 +1027,6 @@ def test_bad_with_prefix(
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1214,13 +1224,12 @@ def test_allOf_ref(self):
         generate_and_check(target_obj, schema_obj)
 
     def test_allOf_bad_schema(self):
-        schema = {
-            "allOf" : [{ "type": "integer" }, { "type": "string" }]
-        }
+        schema = {"allOf": [{"type": "integer"}, {"type": "string"}]}
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert ve.value.args[0] == "allOf with conflicting types"
 
+
 class TestOneOf:
     @pytest.mark.parametrize("target_obj", [123, 42])
     def test_oneOf_simple(self, target_obj):
@@ -1235,7 +1244,6 @@ def test_oneOf_simple(self, target_obj):
         # The actual check
         generate_and_check(target_obj, schema_obj)
 
-
     @pytest.mark.parametrize("target_obj", [123, True])
     def test_oneOf_compound(self, target_obj):
         schema = """{
@@ -1273,7 +1281,6 @@ def test_enum(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1293,7 +1300,6 @@ def test_bad_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes):
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1321,13 +1327,10 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes)
             ("2", False),
             ("1", False),
             (True, False),
-        ]
+        ],
     )
     def test_typed_enum_single_type(self, obj, valid):
-        schema_obj = {
-            "enum": [1, "2", True],
-            "type": "integer"
-        }
+        schema_obj = {"enum": [1, "2", True], "type": "integer"}
         if valid:
             validate(instance=obj, schema=schema_obj)
             generate_and_check(obj, schema_obj)
@@ -1344,13 +1347,10 @@ def test_typed_enum_single_type(self, obj, valid):
             ("2", True),
             ("1", False),
             (True, False),
-        ]
+        ],
     )
     def test_typed_enum_multiple_types(self, obj, valid):
-        schema_obj = {
-            "enum": [1, "2", True],
-            "type": ["integer", "string"]
-        }
+        schema_obj = {"enum": [1, "2", True], "type": ["integer", "string"]}
         if valid:
             validate(instance=obj, schema=schema_obj)
             generate_and_check(obj, schema_obj)
@@ -1360,14 +1360,12 @@ def test_typed_enum_multiple_types(self, obj, valid):
             check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj)
 
     def test_invalid_typed_enum(self):
-        schema_obj = {
-            "enum": [1, "2"],
-            "type": "boolean"
-        }
+        schema_obj = {"enum": [1, "2"], "type": "boolean"}
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
         assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']"
 
+
 class TestConst:
     def test_constant_int(self):
         # First sanity check what we're setting up
@@ -1427,45 +1425,29 @@ def test_constant_precedence(self):
         )
 
     def test_valid_typed_const(self):
-        schema_obj = {
-            "const": 1,
-            "type": "integer"
-        }
+        schema_obj = {"const": 1, "type": "integer"}
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
 
     def test_invalid_typed_const(self):
-        schema_obj = {
-            "const": 1,
-            "type": "boolean"
-        }
+        schema_obj = {"const": 1, "type": "boolean"}
         with pytest.raises(ValidationError):
             gen_json(schema=schema_obj)
 
     def test_valid_enum_const(self):
-        schema_obj = {
-            "const": 1,
-            "enum": [1, 2, 3]
-        }
+        schema_obj = {"const": 1, "enum": [1, 2, 3]}
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
 
     def test_invalid_enum_const(self):
-        schema_obj = {
-            "const": 1,
-            "enum": [2, 3]
-        }
+        schema_obj = {"const": 1, "enum": [2, 3]}
         with pytest.raises(ValidationError):
             gen_json(schema=schema_obj)
 
     def test_valid_typed_enum_const(self):
-        schema_obj = {
-            "const": 1,
-            "enum": [1, "2", 3],
-            "type": "integer"
-        }
+        schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"}
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
@@ -1473,17 +1455,13 @@ def test_valid_typed_enum_const(self):
     @pytest.mark.parametrize(
         "const",
         [
-            "2", # right enum, wrong type
-            2, # wrong enum, right type
-            "3", # wrong enum, wrong type
-        ]
+            "2",  # right enum, wrong type
+            2,  # wrong enum, right type
+            "3",  # wrong enum, wrong type
+        ],
     )
     def test_invalid_typed_enum_const(self, const):
-        schema_obj = {
-            "const": const,
-            "enum": [1, "2", 3],
-            "type": "integer"
-        }
+        schema_obj = {"const": const, "enum": [1, "2", 3], "type": "integer"}
         with pytest.raises(ValidationError):
             gen_json(schema=schema_obj)
 
@@ -1531,11 +1509,15 @@ def test_simple_additional_properties(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
-            ({"a": "1"}, b'{"a": ', b'"', INTEGER_LEADING, ),
+            (
+                {"a": "1"},
+                b'{"a": ',
+                b'"',
+                INTEGER_LEADING,
+            ),
             (
                 {"a": 1, "b": 1.5},
                 b'{"a": 1, "b": 1',
@@ -1555,9 +1537,7 @@ def test_simple_bad_type(self, bad_obj, good_bytes, failure_byte, allowed_bytes)
             schema_obj=schema_obj,
         )
 
-    @pytest.mark.parametrize(
-        "target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}]
-    )
+    @pytest.mark.parametrize("target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}])
     def test_anyOf_additional_properties(self, target_obj):
         # First sanity check what we're setting up
         schema_obj = json.loads(self.anyOf_schema)
@@ -1566,7 +1546,6 @@ def test_anyOf_additional_properties(self, target_obj):
         # The actual check
         generate_and_check(target_obj, schema_obj)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1608,7 +1587,6 @@ def test_properties_and_additional_properties(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1617,9 +1595,7 @@ def test_properties_and_additional_properties(self, target_obj, temperature):
             ({"a": 1, "b": 2}, b'{"', b"a", {b"m"}),
         ],
     )
-    def test_combined_missing_properties(
-        self, bad_obj, good_bytes, failure_byte, allowed_bytes
-    ):
+    def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, allowed_bytes):
         schema_obj = json.loads(self.combined_schema)
         bad_string = json_dumps(bad_obj)
         check_match_failure(
@@ -1630,7 +1606,6 @@ def test_combined_missing_properties(
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1759,7 +1734,6 @@ def test_empty_schema(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_string, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1788,9 +1762,7 @@ def test_empty_schema(self, target_obj, temperature):
             ),
         ],
     )
-    def test_bad_empty_schema(
-        self, bad_string, good_bytes, failure_byte, allowed_bytes
-    ):
+    def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_bytes):
         schema_obj = json.loads(self.empty_schema)
         check_match_failure(
             bad_string=bad_string,
@@ -1806,7 +1778,12 @@ def test_bad_empty_schema(
             # Empty property
             {"type": "object", "properties": {"a": {}}, "required": ["a"]},
             # Empty reference
-            {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]},
+            {
+                "type": "object",
+                "properties": {"a": {"$ref": "#/$defs/A"}},
+                "$defs": {"A": {}},
+                "required": ["a"],
+            },
         ],
     )
     @pytest.mark.parametrize(
@@ -1837,10 +1814,14 @@ def test_nested_empty_schema(self, schema_obj, target_obj, temperature):
             # Empty property
             {"type": "object", "properties": {"a": {}}, "required": ["a"]},
             # Empty reference
-            {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]},
+            {
+                "type": "object",
+                "properties": {"a": {"$ref": "#/$defs/A"}},
+                "$defs": {"A": {}},
+                "required": ["a"],
+            },
         ],
     )
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1883,7 +1864,6 @@ def test_nested_empty_schema_with_props(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1918,7 +1898,6 @@ def test_items(self, schema_obj):
             [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj
         )
 
-
     def test_no_items(self):
         schema_obj = {"type": "array", "items": False}
         check_match_failure(
@@ -1951,7 +1930,6 @@ def test_additionalProperties(self, schema_obj):
             schema_obj,
         )
 
-
     def test_no_additionalProperties(self):
         schema_obj = {"type": "object", "additionalProperties": False}
         check_match_failure(
@@ -1962,17 +1940,17 @@ def test_no_additionalProperties(self):
             schema_obj=schema_obj,
         )
 
+
 def test_ignored_keys_allowed_as_properties():
     schema_obj = {
         "type": "object",
-        "properties": {
-            key: {"type": "string"} for key in IGNORED_KEYS
-        },
+        "properties": {key: {"type": "string"} for key in IGNORED_KEYS},
         "required": list(IGNORED_KEYS),
     }
     target_obj = {key: "value" for key in IGNORED_KEYS}
     generate_and_check(target_obj, schema_obj)
 
+
 class TestRequiredProperties:
     schema_obj = {
         "type": "object",
@@ -1981,10 +1959,19 @@ class TestRequiredProperties:
             "b": {"type": "number"},
             "c": {"type": "boolean"},
         },
-        "additionalProperties": True
+        "additionalProperties": True,
     }
     ALL_REQUIRED = ["a", "b", "c"]
-    SOME_REQUIRED_SUBSETS = [[], ["a"], ["b"], ["c"], ["a", "b"], ["a", "c"], ["b", "c"], ["a", "b", "c"]]
+    SOME_REQUIRED_SUBSETS = [
+        [],
+        ["a"],
+        ["b"],
+        ["c"],
+        ["a", "b"],
+        ["a", "c"],
+        ["b", "c"],
+        ["a", "b", "c"],
+    ]
     NONE_REQUIRED: list[str] = []
 
     @pytest.mark.parametrize(
@@ -1993,7 +1980,7 @@ class TestRequiredProperties:
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ]
+        ],
     )
     def test_all_required_good(self, extra_items):
         schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED}
@@ -2013,7 +2000,7 @@ def test_all_required_good(self, extra_items):
             ({"c": True}),
             # Missing all
             ({}),
-        ]
+        ],
     )
     def test_all_required_bad(self, bad_obj):
         schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED}
@@ -2028,7 +2015,7 @@ def test_all_required_bad(self, bad_obj):
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "required",
@@ -2066,7 +2053,7 @@ def test_some_required_bad(self, required):
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "target_obj",
@@ -2079,55 +2066,48 @@ def test_some_required_bad(self, required):
             {"a": "hello", "c": True},
             {"b": 42, "c": True},
             {"a": "hello", "b": 42, "c": True},
-        ]
+        ],
     )
     def test_none_required(self, target_obj, extra_items):
         schema_obj = {**self.schema_obj, "required": self.NONE_REQUIRED}
         generate_and_check({**target_obj, **extra_items}, schema_obj)
 
+
 class TestRequiredPropertiesScaling:
-    @pytest.mark.parametrize(
-        "num_properties",
-        [1, 2, 3, 4, 5, 10, 20, 50, 100]
-    )
+    @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100])
     def test_many_optional_properties_doesnt_blow_up(self, num_properties):
         schema_obj = {
             "type": "object",
-            "properties": {
-                f"prop_{i}": {"type": "string"} for i in range(num_properties)
-            },
-            "required": [] # Empty should be worst-case scenario
+            "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)},
+            "required": [],  # Empty should be worst-case scenario
         }
         from guidance.library._json import GenJson
+
         genjson = GenJson(schema=schema_obj)
         genjson._join.__wrapped__.cache_clear()
         _ = genjson.root()
         cache_info = genjson._join.__wrapped__.cache_info()
 
         # Theoretical number of cache misses under the current implementation
-        expected_misses = 2*num_properties - 1
-        MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from?
+        expected_misses = 2 * num_properties - 1
+        MISSES_MAGIC_NUMBER = 5  # Where in the world is this coming from?
         assert 0 < cache_info.misses <= expected_misses + MISSES_MAGIC_NUMBER
         # NOTE: that if the cache maxsize is hit, the number of misses will be more than expected
 
         # Theoretical number of total calls under the current implementation
-        expected_calls = num_properties*(num_properties - 1) // 2
-        CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from?
+        expected_calls = num_properties * (num_properties - 1) // 2
+        CALLS_MAGIC_NUMBER = 12  # Where in the world is this coming from?
         assert 0 < cache_info.hits + cache_info.misses <= expected_calls + CALLS_MAGIC_NUMBER
 
-    @pytest.mark.parametrize(
-        "num_properties",
-        [1, 2, 3, 4, 5, 10, 20, 50, 100]
-    )
+    @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100])
     def test_all_required_properties_doesnt_blow_up(self, num_properties):
         schema_obj = {
             "type": "object",
-            "properties": {
-                f"prop_{i}": {"type": "string"} for i in range(num_properties)
-            },
-            "required": [f"prop_{i}" for i in range(num_properties)]
+            "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)},
+            "required": [f"prop_{i}" for i in range(num_properties)],
         }
         from guidance.library._json import GenJson
+
         genjson = GenJson(schema=schema_obj)
         genjson._join.__wrapped__.cache_clear()
         _ = genjson.root()
@@ -2155,7 +2135,7 @@ class TestBooleanSchema:
             {"a": [1, 2, 3]},
             {"a": {"b": 1}},
             False,
-            True
+            True,
         ],
     )
     def test_true_schema(self, target_obj):
@@ -2168,13 +2148,14 @@ def test_true_schema(self, target_obj):
         [
             False,
             {"type": "object", "properties": {"a": False}, "required": ["a"]},
-        ]
+        ],
     )
     def test_false_schema(self, schema_obj):
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
         assert ve.value.args[0] == "No valid JSON can be generated from a schema of `False`"
 
+
 class TestWhitespace:
     seps = [
         (", ", ": "),
@@ -2192,7 +2173,7 @@ class TestWhitespace:
             ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
             # Static object: const (both item and key seps)
             ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "separators",
@@ -2218,7 +2199,7 @@ def test_separators(self, separators, schema, obj):
             ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
             # Static object: const (both item and key seps)
             ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "separators",
diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py
index 49f035283..f2248129d 100644
--- a/tests/unit/library/json/test_refs.py
+++ b/tests/unit/library/json/test_refs.py
@@ -1,9 +1,9 @@
-from .utils import check_match_failure, generate_and_check
+from json import dumps as json_dumps
 
 import pytest
 from jsonschema import ValidationError, validate
 
-from json import dumps as json_dumps
+from .utils import check_match_failure, generate_and_check
 
 
 class TestRefs:
@@ -439,9 +439,15 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
             # invalid on inner field
             ({"bar": "a", "foo": {"bar": 1}}, False),
             # invalid on outer field
-            ({ "bar": 1, "foo": {"bar": "a"}}, False),
+            ({"bar": 1, "foo": {"bar": "a"}}, False),
             # valid on both fields
-            ({"bar": "a", "foo": {"bar": "a"}, }, True),
+            (
+                {
+                    "bar": "a",
+                    "foo": {"bar": "a"},
+                },
+                True,
+            ),
         ],
     )
     def test_refs_with_relative_uris_and_defs(self, test_object, valid):
@@ -975,4 +981,4 @@ def test_empty_tokens_in_ref_json_pointer(self, test_object, valid):
         else:
             with pytest.raises(ValidationError):
                 validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
\ No newline at end of file
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
diff --git a/tests/unit/library/json/test_string_format.py b/tests/unit/library/json/test_string_format.py
index 3259274bb..7b2dd9bdc 100644
--- a/tests/unit/library/json/test_string_format.py
+++ b/tests/unit/library/json/test_string_format.py
@@ -1,10 +1,10 @@
 """Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/optional/format"""
 
-import pytest
 import json
 
-from .utils import generate_and_check
-from .utils import check_match_failure
+import pytest
+
+from .utils import check_match_failure, generate_and_check
 
 
 class TestDate:
@@ -45,17 +45,35 @@ def test_good(self, target_str):
         "bad_str",
         [
             '"2020-01-32"',  # a invalid date string with 32 days in January
-            pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 29 days in February (normal)
-            pytest.param('"2020-02-30"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 30 days in February (leap)
+            pytest.param(
+                '"2021-02-29"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 29 days in February (normal)
+            pytest.param(
+                '"2020-02-30"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 30 days in February (leap)
             '"2020-03-32"',  # a invalid date string with 32 days in March
-            pytest.param('"2020-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in April
+            pytest.param(
+                '"2020-04-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 31 days in April
             '"2020-05-32"',  # a invalid date string with 32 days in May
-            pytest.param('"2020-06-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in June
+            pytest.param(
+                '"2020-06-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 31 days in June
             '"2020-07-32"',  # a invalid date string with 32 days in July
             '"2020-08-32"',  # a invalid date string with 32 days in August
-            pytest.param('"2020-09-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in September
+            pytest.param(
+                '"2020-09-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 31 days in September
             '"2020-10-32"',  # a invalid date string with 32 days in October
-            pytest.param('"2020-11-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in November
+            pytest.param(
+                '"2020-11-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 31 days in November
             '"2020-12-32"',  # a invalid date string with 32 days in December
             '"2020-13-01"',  # a invalid date string with invalid month
             '"06/19/1963"',  # an invalid date string
@@ -63,8 +81,13 @@ def test_good(self, target_str):
             '"1998-1-20"',  # non-padded month dates are not valid
             '"1998-01-1"',  # non-padded day dates are not valid
             '"1998-13-01"',  # invalid month
-            pytest.param('"1998-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # invalid month-day combination
-            pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")),  # 2021 is not a leap year
+            pytest.param(
+                '"1998-04-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # invalid month-day combination
+            pytest.param(
+                '"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")
+            ),  # 2021 is not a leap year
             '"1963-06-1\\u09ea"',  # invalid non-ASCII '৪' (a Bengali 4)
             '"20230328"',  # ISO8601 / non-RFC3339: YYYYMMDD without dashes (2023-03-28)
             '"2023-W01"',  # ISO8601 / non-RFC3339: week number implicit day of week (2023-01-02)
@@ -138,6 +161,7 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
+
 @pytest.mark.xfail(reason="idn-hostname format not implemented")
 class TestIdnHostname:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"idn-hostname"}'
@@ -301,6 +325,7 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
+
 @pytest.mark.xfail(reason="iri-reference format is not yet implemented")
 class TestIriReference:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"iri-reference"}'
@@ -490,20 +515,40 @@ def test_good(self, target_str):
             '"008:030:006Z"',  # invalid time string with extra leading zeros
             '"8:3:6Z"',  # invalid time string with no leading zero for single digit
             '"8:0030:6Z"',  # hour, minute, second must be two digits
-            pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, Zulu (wrong hour)
-            pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, Zulu (wrong minute)
-            pytest.param('"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, zero time-offset (wrong hour)
-            pytest.param('"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, zero time-offset (wrong minute)
-            pytest.param('"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, positive time-offset (wrong hour)
-            pytest.param('"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, positive time-offset (wrong minute)
-            pytest.param('"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, negative time-offset (wrong hour)
-            pytest.param('"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, negative time-offset (wrong minute)
+            pytest.param(
+                '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, Zulu (wrong hour)
+            pytest.param(
+                '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, Zulu (wrong minute)
+            pytest.param(
+                '"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, zero time-offset (wrong hour)
+            pytest.param(
+                '"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, zero time-offset (wrong minute)
+            pytest.param(
+                '"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, positive time-offset (wrong hour)
+            pytest.param(
+                '"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, positive time-offset (wrong minute)
+            pytest.param(
+                '"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, negative time-offset (wrong hour)
+            pytest.param(
+                '"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, negative time-offset (wrong minute)
             '"08:30:06-8:000"',  # hour, minute in time-offset must be two digits
             '"24:00:00Z"',  # an invalid time string with invalid hour
             '"00:60:00Z"',  # an invalid time string with invalid minute
             '"00:00:61Z"',  # an invalid time string with invalid second
-            pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid time string with invalid leap second (wrong hour)
-            pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid time string with invalid leap second (wrong minute)
+            pytest.param(
+                '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # an invalid time string with invalid leap second (wrong hour)
+            pytest.param(
+                '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # an invalid time string with invalid leap second (wrong minute)
             '"01:02:03+24:00"',  # an invalid time string with invalid time numoffset hour
             '"01:02:03+00:60"',  # an invalid time string with invalid time numoffset minute
             '"01:02:03Z+00:30"',  # an invalid time string with invalid time with both Z and numoffset
@@ -539,11 +584,23 @@ class TestIpv6:
             '"::42:ff:1"',  # leading colons is valid
             '"d6::"',  # trailing colons is valid
             '"1:d6::42"',  # single set of double colons in the middle is valid
-            pytest.param('"1::d6:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with the ipv4 section as decimal octets
-            pytest.param('"1:2::192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with double colons between the sections
-            pytest.param('"::ffff:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with leading double colons (ipv4-mapped ipv6 address)
+            pytest.param(
+                '"1::d6:192.168.0.1"',
+                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
+            ),  # mixed format with the ipv4 section as decimal octets
+            pytest.param(
+                '"1:2::192.168.0.1"',
+                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
+            ),  # mixed format with double colons between the sections
+            pytest.param(
+                '"::ffff:192.168.0.1"',
+                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
+            ),  # mixed format with leading double colons (ipv4-mapped ipv6 address)
             '"1:2:3:4:5:6:7:8"',  # 8 octets
-            pytest.param('"1000:1000:1000:1000:1000:1000:255.255.255.255"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # a long valid ipv6
+            pytest.param(
+                '"1000:1000:1000:1000:1000:1000:255.255.255.255"',
+                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
+            ),  # a long valid ipv6
         ],
     )
     def test_good(self, target_str):
@@ -711,11 +768,22 @@ class TestEmail:
             '"te~st@example.com"',  # tilde in local part is valid
             '"~test@example.com"',  # tilde before local part is valid
             '"test~@example.com"',  # tilde after local part is valid
-            pytest.param('"\\"joe bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a space in the local part is valid
-            pytest.param('"\\"joe..bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a double dot in the local part is valid
-            pytest.param('"\\"joe@bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a @ in the local part is valid
+            pytest.param(
+                '"\\"joe bloggs\\"@example.com"',
+                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
+            ),  # a quoted string with a space in the local part is valid
+            pytest.param(
+                '"\\"joe..bloggs\\"@example.com"',
+                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
+            ),  # a quoted string with a double dot in the local part is valid
+            pytest.param(
+                '"\\"joe@bloggs\\"@example.com"',
+                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
+            ),  # a quoted string with a @ in the local part is valid
             '"joe.bloggs@[127.0.0.1]"',  # an IPv4-address-literal after the @ is valid
-            pytest.param('"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")),  # an IPv6-address-literal after the @ is valid
+            pytest.param(
+                '"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")
+            ),  # an IPv6-address-literal after the @ is valid
             '"te.s.t@example.com"',  # two separated dots inside local part are valid
             '"riedgar+guidance@example.com"',  # plus sign in local part is valid
         ],
@@ -861,9 +929,16 @@ def test_good(self, target_str):
         "bad_str",
         [
             '"1998-12-31T23:59:61Z"',  # an invalid date-time past leap second, UTC
-            pytest.param('"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid date-time with leap second on a wrong minute, UTC
-            pytest.param('"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid date-time with leap second on a wrong hour, UTC
-            pytest.param('"1990-02-31T15:59:59.123-08:00"', marks=pytest.mark.xfail(reason="valid days not yet tied to month")),  # an invalid day in date-time string
+            pytest.param(
+                '"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # an invalid date-time with leap second on a wrong minute, UTC
+            pytest.param(
+                '"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # an invalid date-time with leap second on a wrong hour, UTC
+            pytest.param(
+                '"1990-02-31T15:59:59.123-08:00"',
+                marks=pytest.mark.xfail(reason="valid days not yet tied to month"),
+            ),  # an invalid day in date-time string
             '"1990-12-31T15:59:59-24:00"',  # an invalid offset in date-time string
             '"1963-06-19T08:30:06.28123+01:00Z"',  # an invalid closing Z after time-zone offset
             '"06/19/1963 08:30:06 PST"',  # an invalid date-time string
@@ -878,6 +953,7 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
+
 @pytest.mark.xfail(reason="regex format not implemented")
 class TestRegex:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"regex"}'
diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py
index 920571d27..ffbbe3b5f 100644
--- a/tests/unit/library/json/utils.py
+++ b/tests/unit/library/json/utils.py
@@ -1,19 +1,23 @@
-from typing import Union, Optional, Any, Set
-from guidance import json as gen_json
-from guidance.library._json import JSONSchema
-
-from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check
+import json
+from functools import partial
+from json import dumps as json_dumps
+from json import loads as json_loads
+from typing import Any, Optional, Set, Union
 
 from jsonschema import validate
 
+from guidance import json as gen_json
+from guidance.library._json import JSONSchema
 
-import json
-from functools import partial
-from json import dumps as json_dumps, loads as json_loads
+from ....utils import check_match_failure as _check_match_failure
+from ....utils import check_run_with_temperature
+from ....utils import generate_and_check as _generate_and_check
 
 
 def generate_and_check(
-    target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None
+    target_obj: Any,
+    schema_obj: Union[str, JSONSchema],
+    desired_temperature: Optional[float] = None,
 ):
     if isinstance(schema_obj, str):
         schema_obj = json_loads(schema_obj)
@@ -26,9 +30,7 @@ def generate_and_check(
     # Now test that the grammar can recognize and generate prepared_json
     # We partial in the grammar_callable
     if desired_temperature is not None:
-        grammar_callable = partial(
-            gen_json, schema=schema_obj, temperature=desired_temperature
-        )
+        grammar_callable = partial(gen_json, schema=schema_obj, temperature=desired_temperature)
     else:
         grammar_callable = partial(gen_json, schema=schema_obj)
 
@@ -55,4 +57,4 @@ def check_match_failure(
         failure_byte=failure_byte,
         allowed_bytes=allowed_bytes,
         grammar=grammar,
-    )
\ No newline at end of file
+    )

From 8a7dfb6ed51e7c2c428ca7fae23c5e0db0ae9150 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 09:50:10 -0700
Subject: [PATCH 38/70] drop unnecessary typing import from test

---
 tests/unit/library/json/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py
index ffbbe3b5f..5498d718c 100644
--- a/tests/unit/library/json/utils.py
+++ b/tests/unit/library/json/utils.py
@@ -2,7 +2,7 @@
 from functools import partial
 from json import dumps as json_dumps
 from json import loads as json_loads
-from typing import Any, Optional, Set, Union
+from typing import Any, Optional, Union
 
 from jsonschema import validate
 
@@ -46,7 +46,7 @@ def check_match_failure(
     bad_string: str,
     good_bytes: Optional[bytes] = None,
     failure_byte: Optional[bytes] = None,
-    allowed_bytes: Optional[Set[bytes]] = None,
+    allowed_bytes: Optional[set[bytes]] = None,
     schema_obj: Union[str, JSONSchema],
 ):
     grammar = gen_json(schema=schema_obj)

From ce18dc469b351fde831fe68be8e5767d1a48bdb9 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 09:51:27 -0700
Subject: [PATCH 39/70] drop some more unnecessary imports

---
 guidance/library/_json.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index cb6ea3aff..a11a469a6 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -4,7 +4,6 @@
 from typing import (
     Any,
     Callable,
-    Dict,
     Mapping,
     Optional,
     Sequence,
@@ -32,10 +31,10 @@ def urijoin(base: str, uri: str) -> str:
         raise
 
 from .._guidance import guidance
-from ..library import char_range, gen, one_or_more, optional, sequence
+from ..library import optional, sequence
 from ..library._regex_utils import rx_int_range, rx_float_range
 
-from .._grammar import GrammarFunction, select, capture, with_temperature, Not, And, quote_regex
+from .._grammar import GrammarFunction, select, with_temperature, Not, And, quote_regex
 from ._pydantic import pydantic_to_json_schema
 from ._subgrammar import as_regular_grammar, lexeme, subgrammar
 

From 7af4401d9cf8ce7d3de08ee64690079ba50f47b1 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 12:43:20 -0700
Subject: [PATCH 40/70] raise UnsatisfiableSchemaError whenever allOf leads to
 conflicting constraints

---
 guidance/library/_json.py | 128 ++++++++++++++++++++++----------------
 1 file changed, 74 insertions(+), 54 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index a11a469a6..5afdb1bd1 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -16,19 +16,7 @@
 import referencing
 from collections import defaultdict
 import urllib.parse
-
-def urijoin(base: str, uri: str) -> str:
-    # Special case for fragment-only URIs
-    if uri.startswith("#"):
-        return f"{base}{uri}"
-    return urllib.parse.urljoin(base, uri)
-
-try:
-    import jsonschema
-    import pydantic
-except ImportError:
-    if TYPE_CHECKING:
-        raise
+import functools
 
 from .._guidance import guidance
 from ..library import optional, sequence
@@ -38,12 +26,14 @@ def urijoin(base: str, uri: str) -> str:
 from ._pydantic import pydantic_to_json_schema
 from ._subgrammar import as_regular_grammar, lexeme, subgrammar
 
-JSONSchema = Union[bool, dict[str, Any]]
+try:
+    import jsonschema
+    import pydantic
+except ImportError:
+    if TYPE_CHECKING:
+        raise
 
-class Unset(Enum):
-    # https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions
-    token = 0
-_unset = Unset.token
+JSONSchema = Union[bool, dict[str, Any]]
 
 DRAFT202012_RESERVED_KEYWORDS = {
     # Anchors and References
@@ -388,6 +378,14 @@ class ObjectKeywords(str, Enum):
     "unknown": r"(?s:.*)",
 }
 
+
+def urijoin(base: str, uri: str) -> str:
+    # Special case for fragment-only URIs
+    if uri.startswith("#"):
+        return f"{base}{uri}"
+    return urllib.parse.urljoin(base, uri)
+
+
 def _get_format_pattern(format: str) -> str:
     try:
         pattern = FORMAT_PATTERNS[format]
@@ -413,6 +411,9 @@ def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]:
     return set(node.keys()) & VALID_KEYS - set(IGNORED_KEYS) - {key}
 
 
+class UnsatisfiableSchemaError(ValueError):
+    pass
+
 class GenJson:
     item_separator = ", "
     key_separator = ": "
@@ -728,22 +729,17 @@ def allOf(
         parent_schema: JSONSchema,
         base_uri: str,
     ):
-        type = set(JSONType)
+        types: list[set[JSONType]] = []
         properties: defaultdict[str, list[JSONSchema]] = defaultdict(list)
         required: dict[str, None] = dict() # use a dict for ordered-set behavior
         additional_properties_list: list[tuple[JSONSchema, set[str]]] = []
         prefix_items: defaultdict[int, list[JSONSchema]] = defaultdict(list)
         items_list: list[tuple[JSONSchema, set[int]]] = []
         other_data: dict[str, Any] = {}
-        enum: Optional[list[Any]] = None
-        const: Union[Unset, Any] = _unset
+        enums: list[Sequence[Any]] = []
+        consts: list[Any] = []
 
         def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri: str):
-            nonlocal type
-            nonlocal required
-            nonlocal const
-            nonlocal enum
-
             if key == Keyword.REF:
                 ref = cast(str, value)
                 abspath = urijoin(base_uri, ref)
@@ -751,37 +747,19 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri
                 add_schema(resolved.contents, base_uri=resolved.resolver._base_uri)
 
             elif key == Keyword.CONST:
-                if const is not _unset and const != value:
-                    raise ValueError(f"allOf with multiple conflicting const values: {const!r} and {value!r}")
-                const = value
+                consts.append(value)
 
             elif key == Keyword.ENUM:
                 value = cast(list[Any], value)
-                if enum is not None:
-                    try:
-                        enum = list(set(enum) & set(value))
-                    except TypeError:
-                        # Check on equality, not on hash
-                        # Yes, this is O(n^2).
-                        # Hope the items were unique.
-                        # ¯\_(ツ)_/¯
-                        enum = [a for a in enum for b in value if a == b]
-                else:
-                    enum = value
+                enums.append(value)
 
             elif key == Keyword.TYPE:
-                value = cast(Union[str, list[str]], value)
+                value = cast(Union[str, Sequence[str]], value)
                 if isinstance(value, str):
                     value_set = {value}
                 else:
                     value_set = set(value)
-                if JSONType.NUMBER in value_set:
-                    # Number implies integer
-                    value_set.add(JSONType.INTEGER)
-                type &= value_set
-                # Throw an error early if we have conflicting types
-                if not type:
-                    raise ValueError("allOf with conflicting types")
+                types.append(value_set)
 
             elif key == Keyword.ALLOF:
                 value = cast(Sequence[JSONSchema], value)
@@ -857,7 +835,7 @@ def add_schema(schema: JSONSchema, base_uri: str):
             if schema is True:
                 return
             if schema is False:
-                raise ValueError("allOf contains a False schema")
+                raise UnsatisfiableSchemaError("allOf contains a 'false' schema")
 
             # Validate the schema's keys (we have only validated the parent schema's keys so far)
             # TODO: This will make us validate the parent twice... should probably be refactored
@@ -876,9 +854,7 @@ def add_schema(schema: JSONSchema, base_uri: str):
 
         add_schema(parent_schema, base_uri)
 
-        combined_schema: dict[str, Any] = {
-            Keyword.TYPE: list(type),
-        }
+        combined_schema: dict[str, Any] = {}
 
         # Post-process additional_properties to make sure we apply the additional properties of one
         # schema to the properties of another schema
@@ -898,13 +874,16 @@ def add_schema(schema: JSONSchema, base_uri: str):
                     combined_schema[ObjectKeywords.PROPERTIES][name] = schemas[0]
                 else:
                     combined_schema[ObjectKeywords.PROPERTIES][name] = {"allOf": schemas}
+
         if required:
             combined_schema[ObjectKeywords.REQUIRED] = list(required.keys())
+
         if additional_properties_list:
             if len(additional_properties_list) == 1:
                 combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES], _ = additional_properties_list[0]
             else:
                 combined_schema[ObjectKeywords.ADDITIONAL_PROPERTIES] = {"allOf": [schema for schema, _ in additional_properties_list]}
+
         if prefix_items:
             combined_schema[ArrayKeywords.PREFIX_ITEMS] = []
             for i in range(len(prefix_items)):
@@ -913,16 +892,57 @@ def add_schema(schema: JSONSchema, base_uri: str):
                     combined_schema[ArrayKeywords.PREFIX_ITEMS].append(schemas[0])
                 else:
                     combined_schema[ArrayKeywords.PREFIX_ITEMS].append({"allOf": schemas})
+
         if items_list:
             if len(items_list) == 1:
                 combined_schema[ArrayKeywords.ITEMS], _ = items_list[0]
             else:
                 combined_schema[ArrayKeywords.ITEMS] = {"allOf": [schema for schema, _ in items_list]}
-        if enum is not None:
+
+        if enums:
+            if len(enums) == 1:
+                enum = enums[0]
+            else:
+                def reduce_enums(enum_a, enum_b):
+                    try:
+                        enum = list(set(enum_a) & set(enum_b))
+                    except TypeError:
+                        # Check on equality, not on hash
+                        # Yes, this is O(n^2).
+                        # Hope the items were unique.
+                        # ¯\_(ツ)_/¯
+                        enum = [a for a in enum_a for b in enum_b if a == b]
+                    return enum
+                enum = functools.reduce(reduce_enums, enums[1:], enums[0])
+            if not enum:
+                raise UnsatisfiableSchemaError(f"allOf has enums with no common values: {enums}")
             combined_schema[Keyword.ENUM] = enum
-        if const is not _unset:
+
+        if consts:
+            const, *rest = consts
+            for c in rest:
+                if c != const:
+                    raise UnsatisfiableSchemaError(f"allOf has consts with different values: {consts}")
             combined_schema[Keyword.CONST] = const
 
+        if types:
+            if len(types) == 1:
+                type = list(types[0])
+            else:
+                def reduce_types(type_a: set[JSONType], type_b: set[JSONType]) -> set[JSONType]:
+                    common_types = type_a & type_b
+                    # Integer is a "subtype" of number, so ensure we keep integer if we have "number" in one and "integer" in the other
+                    if JSONType.INTEGER not in common_types and (
+                        (JSONType.NUMBER in type_a and JSONType.INTEGER in type_b) or
+                        (JSONType.INTEGER in type_a and JSONType.NUMBER in type_b)
+                    ):
+                        common_types.add(JSONType.INTEGER)
+                    return common_types
+                type = list(functools.reduce(reduce_types, types[1:], types[0]))
+                if not type:
+                    raise UnsatisfiableSchemaError(f"allOf has conflicting types: {types}")
+            combined_schema[Keyword.TYPE] = type
+
         assert not set(combined_schema) & set(other_data)
         combined_schema.update(other_data)
 

From 487282af9fd395bbfd4822127289cf3eecafa1e6 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 12:47:48 -0700
Subject: [PATCH 41/70] raise UnsatisfiableSchemaError if schema is literal
 false

---
 guidance/library/_json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 5afdb1bd1..927dea5be 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -1058,7 +1058,7 @@ def json(
         if json_schema is True:
             json_schema = {}
         elif json_schema is False:
-            raise ValueError("No valid JSON can be generated from a schema of `False`")
+            raise UnsatisfiableSchemaError("No valid JSON can be generated from a schema of `false`")
 
         if json_schema == {}:
             return lm + self.any()

From 6aa856722964a226fe7f8403f5df917acd13f885 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 13:26:41 -0700
Subject: [PATCH 42/70] catch UnsatisfiableSchemaError if raised when building
 grammar for object property; add tests that assert informative tracebacks

---
 guidance/library/_json.py             | 44 ++++++++++++++++++---------
 tests/unit/library/json/test_allOf.py |  4 +--
 tests/unit/library/json/test_json.py  | 23 +++++++-------
 3 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 927dea5be..031b544c3 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -551,17 +551,32 @@ def object(
         required: Sequence[str],
         base_uri: str,
     ):
-        # "required" keys will be validated against "properties" if they're present, otherwise against "additionalProperties".
-        # If "additionalProperties" is False, then required keys must be in "properties".
-        if any(k not in properties for k in required) and additional_properties is False:
-            raise ValueError(
-                f"Required properties not in properties but additionalProperties is False."
-                f" Missing required properties: {list(r for r in required if r not in properties)}"
-            )
+        illegal_keys = set()
+        property_grammars: dict[str, GrammarFunction] = {}
+        for name, schema in properties.items():
+            try:
+                property_grammars[name] = self.json(json_schema=schema, base_uri=base_uri)
+            except UnsatisfiableSchemaError as e:
+                # We get here if the schema is a literal False or is otherwise determined to be unsatisfiable
+                if name in required:
+                    raise UnsatisfiableSchemaError(f"Required property {name!r} is unsatisfiable") from e
+                illegal_keys.add(name)
+
+        additional_properties_grammar: Optional[GrammarFunction] = None
+        try:
+            additional_properties_grammar = self.json(json_schema=additional_properties, base_uri=base_uri)
+        except UnsatisfiableSchemaError as e:
+            if any(k not in properties for k in required):
+                # "required" keys will be validated against "properties" if they're present, otherwise against "additionalProperties".
+                # If "additionalProperties" is unsatisfiable, then required keys must be in "properties".
+                raise UnsatisfiableSchemaError(
+                    f"Required properties not in properties but additionalProperties is unsatisfiable."
+                    f" Missing required properties: {list(r for r in required if r not in properties)}"
+                ) from e
 
         keys: list[str] = []
         required_items: list[bool] = []
-        grammars: list[GrammarFunction] = []
+        item_grammars: list[GrammarFunction] = []
         # First iterate over the properties in order, then iterate over any missing required keys, using additional_properties as the schema
         for name in (*properties, *(r for r in required if r not in properties)):
             # Use json_dumps to properly quote / escape the key
@@ -570,7 +585,7 @@ def object(
             # Identify if the key is required
             required_items.append(name in required)
             # Build the grammar we'll use for this property
-            grammars.append(f'{key}{self.key_separator}' + self.json(json_schema=properties.get(name, additional_properties), base_uri=base_uri))
+            item_grammars.append(f'{key}{self.key_separator}' + property_grammars.get(name, additional_properties_grammar))
 
         if additional_properties is not False:
             # Key for additionalProperties is a json string, but we need to disallow any properties that are already defined
@@ -586,13 +601,14 @@ def object(
             else:
                 additional_key_grammar = self.string()
 
-            additional_item_grammar = additional_key_grammar + self.key_separator + self.json(json_schema=additional_properties, base_uri=base_uri)
-            additional_items_grammar = sequence(additional_item_grammar + self.item_separator) + additional_item_grammar
-            grammars.append(additional_items_grammar)
-            required_items.append(False)
+            if additional_properties_grammar is not None:
+                additional_item_grammar = additional_key_grammar + self.key_separator + additional_properties_grammar
+                additional_items_grammar = sequence(additional_item_grammar + self.item_separator) + additional_item_grammar
+                item_grammars.append(additional_items_grammar)
+                required_items.append(False)
 
         return lm + "{" + self._join(
-            elements = tuple(grammars),
+            elements = tuple(item_grammars),
             required = tuple(required_items),
         ) + "}"
 
diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py
index 1a388d2ce..659171463 100644
--- a/tests/unit/library/json/test_allOf.py
+++ b/tests/unit/library/json/test_allOf.py
@@ -176,7 +176,7 @@ def test_allOf_with_boolean_schemas_some_false(self, test_object, valid):
                 validate(instance=test_object, schema=schema)
             with pytest.raises(ValueError) as ve:
                 _ = gen_json(schema=schema)
-            assert ve.value.args[0] == "allOf contains a False schema"
+            assert ve.value.args[0] == "allOf contains a 'false' schema"
 
     @pytest.mark.parametrize(
         ["test_object", "valid"],
@@ -198,7 +198,7 @@ def test_allOf_with_boolean_schemas_all_false(self, test_object, valid):
                 validate(instance=test_object, schema=schema)
             with pytest.raises(ValueError) as ve:
                 _ = gen_json(schema=schema)
-            assert ve.value.args[0] == "allOf contains a False schema"
+            assert ve.value.args[0] == "allOf contains a 'false' schema"
 
     @pytest.mark.parametrize(
         ["test_object", "valid"],
diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index f3602fddd..724369a3e 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -707,8 +707,9 @@ def test_false_additionalProperties_fails(self):
             _ = gen_json(schema=schema)
         assert (
             ve.value.args[0]
-            == "Required properties not in properties but additionalProperties is False. Missing required properties: ['b', 'c']"
+            == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b', 'c']"
         )
+        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
 
 
 class TestSimpleArray:
@@ -1227,7 +1228,7 @@ def test_allOf_bad_schema(self):
         schema = {"allOf": [{"type": "integer"}, {"type": "string"}]}
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
-        assert ve.value.args[0] == "allOf with conflicting types"
+        assert ve.value.args[0] == "allOf has conflicting types: [{'integer'}, {'string'}]"
 
 
 class TestOneOf:
@@ -2143,18 +2144,18 @@ def test_true_schema(self, target_obj):
         schema_obj = True
         generate_and_check(target_obj, schema_obj)
 
-    @pytest.mark.parametrize(
-        "schema_obj",
-        [
-            False,
-            {"type": "object", "properties": {"a": False}, "required": ["a"]},
-        ],
-    )
-    def test_false_schema(self, schema_obj):
+    def test_false_schema(self):
+        schema_obj = False
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
-        assert ve.value.args[0] == "No valid JSON can be generated from a schema of `False`"
+        assert ve.value.args[0] == "No valid JSON can be generated from a schema of `false`"
 
+    def test_false_required_property(self):
+        schema_obj = {"type": "object", "properties": {"a": False}, "required": ["a"]}
+        with pytest.raises(ValueError) as ve:
+            gen_json(schema=schema_obj)
+        assert ve.value.args[0] == "Required property 'a' is unsatisfiable"
+        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
 
 class TestWhitespace:
     seps = [

From 372bd3df3a50da14b096dc2d83560eec800c4642 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 13:51:18 -0700
Subject: [PATCH 43/70] raise more UnsatisfiableSchemaErrors if min > max for
 string, number, array

---
 guidance/library/_json.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 031b544c3..593b6aa56 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -411,6 +411,16 @@ def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]:
     return set(node.keys()) & VALID_KEYS - set(IGNORED_KEYS) - {key}
 
 
+def check_number_bounds(minimum: Union[float, int, None], maximum: Union[float, int, None], exclusiveMinimum: bool, exclusiveMaximum: bool):
+    if minimum is not None and maximum is not None:
+        if minimum > maximum:
+            raise UnsatisfiableSchemaError(f"Number minimum ({minimum}) is greater than maximum ({maximum})")
+        if minimum == maximum and (exclusiveMinimum or exclusiveMaximum):
+            minimum_repr = f"exclusiveMinimum {minimum}" if exclusiveMinimum else f"minimum {minimum}"
+            maximum_repr = f"exclusiveMaximum {maximum}" if exclusiveMaximum else f"maximum {maximum}"
+            raise UnsatisfiableSchemaError(f"Number {minimum_repr} is equal to {maximum_repr}")
+
+
 class UnsatisfiableSchemaError(ValueError):
     pass
 
@@ -471,6 +481,8 @@ def root(self, lm):
     @classmethod
     @guidance(stateless=True)
     def integer(cls, lm, minimum: Union[float, int, None] = None, maximum: Union[float, int, None] = None, exclusiveMinimum: bool = False, exclusiveMaximum: bool = False):
+        check_number_bounds(minimum, maximum, exclusiveMinimum, exclusiveMaximum)
+
         if minimum is not None:
             if exclusiveMinimum:
                 if minimum != int(minimum):
@@ -496,6 +508,8 @@ def integer(cls, lm, minimum: Union[float, int, None] = None, maximum: Union[flo
     @classmethod
     @guidance(stateless=True)
     def number(cls, lm, minimum: Optional[float] = None, maximum: Optional[float] = None, exclusiveMinimum: bool = False, exclusiveMaximum: bool = False):
+        check_number_bounds(minimum, maximum, exclusiveMinimum, exclusiveMaximum)
+
         return lm + lexeme(
             rx_float_range(
                 minimum, maximum,
@@ -517,6 +531,9 @@ def string(
         regex: Union[str, None] = None,
         format: Union[str, None] = None,
     ):
+        if min_length is not None and max_length is not None and min_length > max_length:
+            raise UnsatisfiableSchemaError(f"String minLength ({min_length}) is greater than maxLength ({max_length})")
+
         if (regex is not None or format is not None) and (min_length > 0 or max_length is not None):
             raise ValueError(
                 "If a pattern or format is specified for a JSON string, minLength and maxLength must be left unspecified."
@@ -651,6 +668,9 @@ def array(
         max_items: Optional[int],
         base_uri: str,
     ):
+        if max_items is not None and min_items > max_items:
+            raise UnsatisfiableSchemaError(f"minItems ({min_items}) is greater than maxItems ({max_items})")
+
         if len(prefix_items_schema) < min_items and item_schema is False:
             raise ValueError(
                 f"PrefixItems has too few elements ({len(prefix_items_schema)}) to"

From 7149e87fcb0380a34253dd413f36ef2317eebda0 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 13:54:35 -0700
Subject: [PATCH 44/70] add illegal keys to not expression of
 additonal_key_grammar

---
 guidance/library/_json.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 593b6aa56..70c144a14 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -577,7 +577,9 @@ def object(
                 # We get here if the schema is a literal False or is otherwise determined to be unsatisfiable
                 if name in required:
                     raise UnsatisfiableSchemaError(f"Required property {name!r} is unsatisfiable") from e
-                illegal_keys.add(name)
+                # Use json_dumps to properly quote / escape the key
+                key = json_dumps(name)
+                illegal_keys.add(key)
 
         additional_properties_grammar: Optional[GrammarFunction] = None
         try:
@@ -611,7 +613,7 @@ def object(
                 additional_key_grammar = as_regular_grammar(
                     And([
                         lexeme(r'"([^"\\]|\\["\\/bfnrt]|\\u[0-9a-fA-F]{4})*"'),
-                        Not(lexeme('|'.join(map(quote_regex, keys)))),
+                        Not(lexeme('|'.join(map(quote_regex, (*keys, *illegal_keys))))),
                     ]),
                     lexeme = True,
                 )
@@ -677,9 +679,6 @@ def array(
                 f" satisfy minItems ({min_items}) but no extra items were allowed"
             )
 
-        if max_items is not None and max_items < min_items:
-            raise ValueError(f"maxItems ({max_items}) can't be less than minItems ({min_items})")
-
         required_items = []
         optional_items = []
 

From 614d29f55f789bc37c9e4878069883b20f3c1f78 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 14:10:09 -0700
Subject: [PATCH 45/70] mypy

---
 guidance/library/_json.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 70c144a14..a515f9276 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -604,7 +604,7 @@ def object(
             # Identify if the key is required
             required_items.append(name in required)
             # Build the grammar we'll use for this property
-            item_grammars.append(f'{key}{self.key_separator}' + property_grammars.get(name, additional_properties_grammar))
+            item_grammars.append(f'{key}{self.key_separator}' + property_grammars.get(name, cast(GrammarFunction, additional_properties_grammar)))
 
         if additional_properties is not False:
             # Key for additionalProperties is a json string, but we need to disallow any properties that are already defined
@@ -764,7 +764,7 @@ def allOf(
         parent_schema: JSONSchema,
         base_uri: str,
     ):
-        types: list[set[JSONType]] = []
+        types: list[set[str]] = []
         properties: defaultdict[str, list[JSONSchema]] = defaultdict(list)
         required: dict[str, None] = dict() # use a dict for ordered-set behavior
         additional_properties_list: list[tuple[JSONSchema, set[str]]] = []
@@ -948,7 +948,7 @@ def reduce_enums(enum_a, enum_b):
                         # ¯\_(ツ)_/¯
                         enum = [a for a in enum_a for b in enum_b if a == b]
                     return enum
-                enum = functools.reduce(reduce_enums, enums[1:], enums[0])
+                enum = functools.reduce(reduce_enums, enums)
             if not enum:
                 raise UnsatisfiableSchemaError(f"allOf has enums with no common values: {enums}")
             combined_schema[Keyword.ENUM] = enum
@@ -964,7 +964,7 @@ def reduce_enums(enum_a, enum_b):
             if len(types) == 1:
                 type = list(types[0])
             else:
-                def reduce_types(type_a: set[JSONType], type_b: set[JSONType]) -> set[JSONType]:
+                def reduce_types(type_a: set[str], type_b: set[str]) -> set[str]:
                     common_types = type_a & type_b
                     # Integer is a "subtype" of number, so ensure we keep integer if we have "number" in one and "integer" in the other
                     if JSONType.INTEGER not in common_types and (
@@ -973,7 +973,7 @@ def reduce_types(type_a: set[JSONType], type_b: set[JSONType]) -> set[JSONType]:
                     ):
                         common_types.add(JSONType.INTEGER)
                     return common_types
-                type = list(functools.reduce(reduce_types, types[1:], types[0]))
+                type = list(functools.reduce(reduce_types, types)) # type: ignore[arg-type]
                 if not type:
                     raise UnsatisfiableSchemaError(f"allOf has conflicting types: {types}")
             combined_schema[Keyword.TYPE] = type

From 3b802c8d4f59160962d6ecc43d8a52ab00b78ca5 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 15:08:36 -0700
Subject: [PATCH 46/70] unsatisfiable items

---
 guidance/library/_json.py | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index a515f9276..78d813fbd 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -673,11 +673,15 @@ def array(
         if max_items is not None and min_items > max_items:
             raise UnsatisfiableSchemaError(f"minItems ({min_items}) is greater than maxItems ({max_items})")
 
-        if len(prefix_items_schema) < min_items and item_schema is False:
-            raise ValueError(
-                f"PrefixItems has too few elements ({len(prefix_items_schema)}) to"
-                f" satisfy minItems ({min_items}) but no extra items were allowed"
-            )
+        items_grammar: Optional[GrammarFunction] = None
+        try:
+            items_grammar = self.json(json_schema=item_schema, base_uri=base_uri)
+        except UnsatisfiableSchemaError as e:
+            if len(prefix_items_schema) < min_items:
+                raise UnsatisfiableSchemaError(
+                    f"prefixItems has too few elements ({len(prefix_items_schema)}) to satisfy minItems ({min_items})"
+                    f" but item schema is unsatisfiable"
+                ) from e
 
         required_items = []
         optional_items = []
@@ -686,24 +690,29 @@ def array(
         n_to_add = max(len(prefix_items_schema), min_items) if max_items is None else max_items
         for i in range(n_to_add):
             if i < len(prefix_items_schema):
-                schema = prefix_items_schema[i]
-            elif item_schema is not False:
-                schema = item_schema
+                try:
+                    item = self.json(json_schema=prefix_items_schema[i], base_uri=base_uri)
+                except UnsatisfiableSchemaError as e:
+                    # i corresponds to the number of items we've already satisfied
+                    if i < min_items:
+                        raise UnsatisfiableSchemaError(f"prefixItems[{i}] is unsatisfiable but min_items is {min_items}") from e
+                    # Having an unsatisfiable prefix item is fine if we've already satisfied min_items, but this effectively sets max_items to i
+                    max_items = i
+                    break
+            elif items_grammar is not None:
+                item = items_grammar
             else:
                 assert i >= min_items
                 break
 
-            item = self.json(json_schema=schema, base_uri=base_uri)
-
             if i < min_items:
                 required_items.append(item)
             else:
                 optional_items.append(item)
 
-        if max_items is None and item_schema is not False:
+        if max_items is None and items_grammar is not None:
             # Add an infinite tail of items
-            item = self.json(json_schema=item_schema, base_uri=base_uri)
-            optional_items.append(item + sequence(self.item_separator + item))
+            optional_items.append(items_grammar + sequence(self.item_separator + items_grammar))
 
         lm += "["
 

From 57c7d421be26376fc005b239ea3b01d9e42a7ba9 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 15:39:53 -0700
Subject: [PATCH 47/70] drop xfail

---
 tests/unit/library/json/test_allOf.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py
index 659171463..4cc039d9d 100644
--- a/tests/unit/library/json/test_allOf.py
+++ b/tests/unit/library/json/test_allOf.py
@@ -419,21 +419,14 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
                 {"additionalProperties": {"type": "string"}},
             ],
         }
-        try:
-            if valid:
+        if valid:
+            validate(instance=test_object, schema=schema)
+            generate_and_check(test_object, schema)
+        else:
+            with pytest.raises(ValidationError):
                 validate(instance=test_object, schema=schema)
-                generate_and_check(test_object, schema)
-            else:
-                with pytest.raises(ValidationError):
-                    validate(instance=test_object, schema=schema)
-                check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
-        except ValueError as ve:
-            if ve.args[0] == "allOf with conflicting types":
-                pytest.xfail(
-                    reason="We should be returning a False schema from allOf if there is a conflict, but we currently raise an error"
-                )
-            else:
-                raise
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+
 
     @pytest.mark.parametrize(
         "test_object, valid",

From faa3fe7de23553e1209b9d3ced3c7b8282065a09 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 16:16:55 -0700
Subject: [PATCH 48/70] exception string

---
 guidance/library/_json.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 78d813fbd..bc65e8a7c 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -416,8 +416,8 @@ def check_number_bounds(minimum: Union[float, int, None], maximum: Union[float,
         if minimum > maximum:
             raise UnsatisfiableSchemaError(f"Number minimum ({minimum}) is greater than maximum ({maximum})")
         if minimum == maximum and (exclusiveMinimum or exclusiveMaximum):
-            minimum_repr = f"exclusiveMinimum {minimum}" if exclusiveMinimum else f"minimum {minimum}"
-            maximum_repr = f"exclusiveMaximum {maximum}" if exclusiveMaximum else f"maximum {maximum}"
+            minimum_repr = f"exclusiveMinimum ({minimum})" if exclusiveMinimum else f"minimum ({minimum})"
+            maximum_repr = f"exclusiveMaximum ({maximum})" if exclusiveMaximum else f"maximum ({maximum})"
             raise UnsatisfiableSchemaError(f"Number {minimum_repr} is equal to {maximum_repr}")
 
 

From 01039e2b2b4611cf4b43b0adf6b60c240a7d01ae Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 16:31:49 -0700
Subject: [PATCH 49/70] UnsatisfiableSchemaError for empty oneOf, anyOf

---
 guidance/library/_json.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index bc65e8a7c..55f63056b 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -749,6 +749,8 @@ def anyOf(
         anyof_list: Sequence[JSONSchema],
         base_uri: str,
     ):
+        if not anyof_list:
+            raise UnsatisfiableSchemaError("anyOf has no schemas")
         options = [self.json(json_schema=item, base_uri=base_uri) for item in anyof_list]
         return lm + select(options)
 
@@ -760,6 +762,8 @@ def oneOf(
         oneof_list: Sequence[JSONSchema],
         base_uri: str,
     ):
+        if not oneof_list:
+            raise UnsatisfiableSchemaError("oneOf has no schemas")
         if len(oneof_list) == 1:
             return lm + self.json(json_schema=oneof_list[0], base_uri=base_uri)
         warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")

From a48c9d24ef64fcc2ae9e26bd923d6a5a0fe62a60 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 16:38:21 -0700
Subject: [PATCH 50/70] test unsatisfiable integer

---
 guidance/library/_json.py            |  4 ++--
 tests/unit/library/json/test_json.py | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 55f63056b..3f70fde9c 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -414,11 +414,11 @@ def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]:
 def check_number_bounds(minimum: Union[float, int, None], maximum: Union[float, int, None], exclusiveMinimum: bool, exclusiveMaximum: bool):
     if minimum is not None and maximum is not None:
         if minimum > maximum:
-            raise UnsatisfiableSchemaError(f"Number minimum ({minimum}) is greater than maximum ({maximum})")
+            raise UnsatisfiableSchemaError(f"minimum ({minimum}) is greater than maximum ({maximum})")
         if minimum == maximum and (exclusiveMinimum or exclusiveMaximum):
             minimum_repr = f"exclusiveMinimum ({minimum})" if exclusiveMinimum else f"minimum ({minimum})"
             maximum_repr = f"exclusiveMaximum ({maximum})" if exclusiveMaximum else f"maximum ({maximum})"
-            raise UnsatisfiableSchemaError(f"Number {minimum_repr} is equal to {maximum_repr}")
+            raise UnsatisfiableSchemaError(f"{minimum_repr} is equal to {maximum_repr}")
 
 
 class UnsatisfiableSchemaError(ValueError):
diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index 724369a3e..1984b9478 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -2,6 +2,7 @@
 from json import dumps as json_dumps
 
 import pytest
+import re
 from jsonschema import ValidationError, validate
 
 from guidance import json as gen_json
@@ -76,6 +77,21 @@ def test_bad_integer(self, bad_string, good_bytes, failure_byte, allowed_bytes):
             schema_obj=schema_obj,
         )
 
+    @pytest.mark.parametrize(
+        "schema",
+        [
+            {"type": "integer", "minimum": 5, "maximum": 4},
+            {"type": "integer", "minimum": 5, "exclusiveMaximum": 5},
+            {"type": "integer", "exclusiveMinimum": 5, "maximum": 5},
+        ]
+    )
+    def test_unsatisfiable_min_max(self, schema):
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert re.fullmatch(
+            r"(exclusiveMinimum|minimum) \(5\) is (greater than|equal to) (exclusiveMaximum|maximum) \((4|5)\)",
+            ve.value.args[0]
+        )
 
 class TestNumber:
     schema = """{"type": "number" }"""

From 820b8ba1ca398e6600f36ed616b5403e63bded4a Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 16:39:33 -0700
Subject: [PATCH 51/70] test unsatisfiable number

---
 tests/unit/library/json/test_json.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index 1984b9478..13291b080 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -146,6 +146,21 @@ def test_bad_number(self, bad_string, good_bytes, failure_byte, allowed_bytes):
             schema_obj=schema_obj,
         )
 
+    @pytest.mark.parametrize(
+        "schema",
+        [
+            {"type": "integer", "minimum": 5, "maximum": 4},
+            {"type": "integer", "minimum": 5, "exclusiveMaximum": 5},
+            {"type": "integer", "exclusiveMinimum": 5, "maximum": 5},
+        ]
+    )
+    def test_unsatisfiable_min_max(self, schema):
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert re.fullmatch(
+            r"(exclusiveMinimum|minimum) \(5\) is (greater than|equal to) (exclusiveMaximum|maximum) \((4|5)\)",
+            ve.value.args[0]
+        )
 
 class TestBoundedNumeric:
     @pytest.mark.parametrize(

From 2769f25d8e00e9f4b600307184b6e3429097a8fe Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 16:39:52 -0700
Subject: [PATCH 52/70] test unsatisfiable string

---
 tests/unit/library/json/test_json.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index 13291b080..3118ffe3d 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -571,6 +571,12 @@ def test_maxLength_bad(self, bad_string: str, good_bytes, failure_byte, allowed_
             schema_obj=schema_obj,
         )
 
+    def test_unsatisfiable_length(self):
+        schema = {"type": "string", "minLength": 10, "maxLength": 5}
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert ve.value.args[0] == "String minLength (10) is greater than maxLength (5)"
+
 
 class TestSimpleObject:
     # These are objects without cross references

From 95e8134baf7f2f8580675358df47bc0fe9941e45 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 16:41:53 -0700
Subject: [PATCH 53/70] test unsatisfiable array

---
 tests/unit/library/json/test_json.py | 57 ++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index 3118ffe3d..1b7a011b2 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -836,6 +836,57 @@ def test_bad_object(self, bad_string, good_bytes, failure_byte, allowed_bytes):
             schema_obj=schema_obj,
         )
 
+    def test_unsatisfiable_prefixItem_ok(self):
+        schema = {
+            "type": "array",
+            "prefixItems": [{"type": "integer"}, False]
+        }
+        generate_and_check([42], schema)
+        check_match_failure(
+            bad_string="[42, 43]",
+            good_bytes=b"[42",
+            failure_byte=b",",
+            allowed_bytes={b"]"} | INTEGER_FOLLOWING,
+            schema_obj=schema
+        )
+
+    def test_unsatisfiable_prefixItem_raises(self):
+        schema = {
+            "type": "array",
+            "prefixItems": [{"type": "integer"}, False],
+            "minItems": 2,
+        }
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert ve.value.args[0] == "prefixItems[1] is unsatisfiable but min_items is 2"
+        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+
+    def test_unsatisfiable_items_ok(self):
+        schema = {
+            "type": "array",
+            "prefixItems": [{"type": "integer"}],
+            "items": {"allOf": [{"type": "integer"}, False]}
+        }
+        generate_and_check([42], schema)
+        check_match_failure(
+            bad_string="[42, 43]",
+            good_bytes=b"[42",
+            failure_byte=b",",
+            allowed_bytes={b"]"} | INTEGER_FOLLOWING,
+            schema_obj=schema
+        )
+
+    def test_unsatisfiable_items_raises(self):
+        schema = {
+            "type": "array",
+            "prefixItems": [{"type": "integer"}],
+            "items": {"allOf": [{"type": "integer"}, False]},
+            "minItems": 2,
+        }
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert ve.value.args[0] == "prefixItems has too few elements (1) to satisfy minItems (2) but item schema is unsatisfiable"
+        assert ve.value.__cause__.args[0] == "allOf contains a 'false' schema"
 
 class TestArrayWithLengthConstraints:
     prefix_schema_obj = [{"type": "integer"}, {"type": "boolean"}]
@@ -1120,6 +1171,12 @@ def test_bad_with_items(
             schema_obj=schema_obj,
         )
 
+    def test_unsatisfiable_length(self):
+        schema = {"type": "array", "minItems": 10, "maxItems": 5}
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert ve.value.args[0] == "minItems (10) is greater than maxItems (5)"
+
 
 class TestAnyOf:
     @pytest.mark.parametrize("target_obj", [123, True])

From e21924df4d0ba9580a655c7740612f847a0595f5 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 16:49:05 -0700
Subject: [PATCH 54/70] test unsatisfiable object

---
 guidance/library/_json.py            |  2 +-
 tests/unit/library/json/test_json.py | 38 ++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 3f70fde9c..3ad093c6f 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -597,7 +597,7 @@ def object(
         required_items: list[bool] = []
         item_grammars: list[GrammarFunction] = []
         # First iterate over the properties in order, then iterate over any missing required keys, using additional_properties as the schema
-        for name in (*properties, *(r for r in required if r not in properties)):
+        for name in (*property_grammars.keys(), *(r for r in required if r not in properties)):
             # Use json_dumps to properly quote / escape the key
             key = json_dumps(name)
             keys.append(key)
diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index 1b7a011b2..d5d25c79b 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -708,6 +708,44 @@ def test_bad_object(self, bad_string, good_bytes, failure_byte, allowed_bytes):
             schema_obj=schema_obj,
         )
 
+    def test_unsatisfiable_properties_ok(self):
+        schema = {
+            "type": "object",
+            "properties": {"a": {"type": "integer"}, "b": False},
+            "additionalProperties": False,
+        }
+        generate_and_check({"a": 42}, schema)
+        check_match_failure(
+            bad_string=json_dumps({"a": 42, "b": 43}),
+            good_bytes=b'{"a": 42',
+            failure_byte=b",",
+            allowed_bytes={b"}"} | INTEGER_FOLLOWING,
+            schema_obj=schema,
+        )
+
+    def test_unsatisfiable_properties_raises(self):
+        schema = {
+            "type": "object",
+            "properties": {"a": {"type": "integer"}, "b": False},
+            "required": ["b"],
+            "additionalProperties": False,
+        }
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert ve.value.args[0] == "Required property 'b' is unsatisfiable"
+        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+
+    def test_unsatisfiable_additional_properties_raises(self):
+        schema = {
+            "type": "object",
+            "properties": {"a": {"type": "integer"}},
+            "required": ["a", "b"],
+            "additionalProperties": False,
+        }
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert ve.value.args[0] == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b']"
+        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
 
 class TestObjectWithMissingRequired:
     def test_required_is_required(self):

From add4f518d874cd103718cb7a714ef67332125218 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 17:06:35 -0700
Subject: [PATCH 55/70] raise UnsatisfiableSchemaError if all anyOf subschemas
 are unsatisfiable

---
 guidance/library/_json.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 3ad093c6f..d4be005aa 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -751,7 +751,16 @@ def anyOf(
     ):
         if not anyof_list:
             raise UnsatisfiableSchemaError("anyOf has no schemas")
-        options = [self.json(json_schema=item, base_uri=base_uri) for item in anyof_list]
+
+        options: list[GrammarFunction] = []
+        for item in anyof_list:
+            try:
+                options.append(self.json(json_schema=item, base_uri=base_uri))
+            except UnsatisfiableSchemaError:
+                pass
+        if not options:
+            # Can't really point to any one schema that's unsatisfiable, so let's include all the schemas in the error message
+            raise UnsatisfiableSchemaError("all anyOf schemas are unsatisfiable: " + json_dumps(anyof_list))
         return lm + select(options)
 
     @guidance(stateless=True)

From 6d35e6b12d8d9456b59410ac8074fdda96fe7b42 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 17:07:26 -0700
Subject: [PATCH 56/70] punt multi-type schemas to anyOf so it can handle
 unsatisfiable subschemas

---
 guidance/library/_json.py | 42 +++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index d4be005aa..ac1d24470 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -1223,20 +1223,12 @@ def json(
                 raise NotImplementedError(f"enum with sibling keys is not yet supported. Got {sibling_keys}")
             return lm + self.enum(options=json_schema[Keyword.ENUM], instance_type=json_schema.get(Keyword.TYPE, None))
 
-        if Keyword.TYPE in json_schema:
-            target_types = cast(Union[str, Sequence[str]], json_schema[Keyword.TYPE])
-            if isinstance(target_types, str):
-                target_types = [target_types]
-        else:
-            target_types = list(JSONType)
-
-        options: list[Union[str, GrammarFunction]] = []
-        option: Union[str, GrammarFunction]
-        for target_type in target_types:
+        if Keyword.TYPE in json_schema and isinstance(json_schema[Keyword.TYPE], str):
+            target_type = json_schema[Keyword.TYPE]
             if target_type == JSONType.NULL:
-                option = "null"
+                return  lm + "null"
             elif target_type == JSONType.BOOLEAN:
-                option = select(["true", "false"])
+                return lm + select(["true", "false"])
             elif target_type in {JSONType.INTEGER, JSONType.NUMBER}:
                 minimum = cast(Union[int, float, None], json_schema.get(NumberKeywords.MINIMUM, None))
                 maximum = cast(Union[int, float, None], json_schema.get(NumberKeywords.MAXIMUM, None))
@@ -1258,28 +1250,28 @@ def json(
                         exclusive_maximum_flag = True
 
                 if target_type == JSONType.INTEGER:
-                    option = self.integer(
+                    return lm + self.integer(
                         minimum=minimum,
                         maximum=maximum,
                         exclusiveMinimum=exclusive_minimum_flag,
                         exclusiveMaximum=exclusive_maximum_flag,
                     )
                 else:
-                    option = self.number(
+                    return lm + self.number(
                         minimum=minimum,
                         maximum=maximum,
                         exclusiveMinimum=exclusive_minimum_flag,
                         exclusiveMaximum=exclusive_maximum_flag,
                     )
             elif target_type == JSONType.STRING:
-                option = self.string(
+                return lm + self.string(
                     regex=json_schema.get(StringKeywords.PATTERN, None),
                     format=json_schema.get(StringKeywords.FORMAT, None),
                     min_length=json_schema.get(StringKeywords.MIN_LENGTH, 0),
                     max_length=json_schema.get(StringKeywords.MAX_LENGTH, None),
                 )
             elif target_type == JSONType.ARRAY:
-                option = self.array(
+                return lm + self.array(
                     prefix_items_schema=json_schema.get(ArrayKeywords.PREFIX_ITEMS, []),
                     item_schema=json_schema.get(ArrayKeywords.ITEMS, True),
                     min_items=json_schema.get(ArrayKeywords.MIN_ITEMS, 0),
@@ -1287,7 +1279,7 @@ def json(
                     base_uri=base_uri,
                 )
             elif target_type == JSONType.OBJECT:
-                option = self.object(
+                return lm + self.object(
                     properties=json_schema.get(ObjectKeywords.PROPERTIES, {}),
                     additional_properties=json_schema.get(ObjectKeywords.ADDITIONAL_PROPERTIES, True),
                     required=json_schema.get(ObjectKeywords.REQUIRED, set()),
@@ -1295,9 +1287,21 @@ def json(
                 )
             else:
                 raise ValueError(f"Unsupported type in schema: {target_type}")
-            options.append(option)
 
-        return lm + select(options)
+        if Keyword.TYPE in json_schema:
+            json_schema = json_schema.copy()
+            target_types = cast(Sequence[JSONType], json_schema.pop(Keyword.TYPE))
+        else:
+            target_types = list(JSONType)
+
+        assert Keyword.TYPE not in json_schema
+        # Punt to anyOf if we have multiple types so that it can ignore an unsatisfiable subset
+        return lm + self.anyOf(
+            anyof_list = [
+                {"type": target_type, **json_schema} for target_type in target_types
+            ],
+            base_uri=base_uri,
+        )
 
 
 @guidance(stateless=True)

From b236a2c6acffce8a5ab64669ea3175b274772042 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Fri, 1 Nov 2024 17:07:44 -0700
Subject: [PATCH 57/70] test unsatisfiable anyOf

---
 tests/unit/library/json/test_json.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index d5d25c79b..3a3b553d9 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -1299,6 +1299,19 @@ def test_anyOf_objects(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
+    def test_anyOf_unsatisfiable_ok(self):
+        schema = {
+            "anyOf": [{"type": "integer"}, False]
+        }
+        generate_and_check(3, schema)
+
+    def test_anyOf_unsatisfiable_raises(self):
+        schema = {
+            "anyOf": [{"type": "integer", "minimum": 10, "maximum": 0}, False],
+        }
+        with pytest.raises(ValueError) as ve:
+            _ = gen_json(schema=schema)
+        assert ve.value.args[0] == 'all anyOf schemas are unsatisfiable: [{"type": "integer", "minimum": 10, "maximum": 0}, false]'
 
 class TestAllOf:
     @pytest.mark.parametrize(

From ce84d5369e37d987deb74875cdfefecb7d31208a Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Mon, 4 Nov 2024 09:29:04 -0800
Subject: [PATCH 58/70] Revert "blacken json tests"

This reverts commit 496718ed7e8a0cc62744401d174831e03ac352f1.
Undo blacken to reduce diff size of PR
---
 tests/unit/library/json/test_allOf.py         |  58 ++--
 tests/unit/library/json/test_json.py          | 248 ++++++++++--------
 tests/unit/library/json/test_refs.py          |  17 +-
 tests/unit/library/json/test_string_format.py | 137 +++-------
 tests/unit/library/json/utils.py              |  24 +-
 5 files changed, 207 insertions(+), 277 deletions(-)

diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py
index 4cc039d9d..670878c71 100644
--- a/tests/unit/library/json/test_allOf.py
+++ b/tests/unit/library/json/test_allOf.py
@@ -6,7 +6,6 @@
 from jsonschema import ValidationError, validate
 
 from guidance import json as gen_json
-
 from .utils import check_match_failure, generate_and_check
 
 
@@ -139,6 +138,7 @@ def test_allOf_simple_maximum(self, test_object, valid):
                 validate(instance=test_object, schema=schema)
             check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
 
+
     @pytest.mark.parametrize(
         ["test_object", "valid"],
         [
@@ -357,18 +357,15 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
             ({"foo": 0, "bar": 5, "baz": 4}, False),
             # invalid: baz is not an integer or null
             ({"foo": 0, "bar": 5, "baz": "quxx"}, False),
-        ],
+        ]
     )
     @pytest.mark.parametrize(
         "schema",
         [
             # The following are equivalent to this:
             {
-                "properties": {
-                    "foo": {"type": ["integer", "null"], "maximum": 4},
-                    "bar": {"minimum": 5, "maximum": 5},
-                },
-                "additionalProperties": {"type": ["integer", "null"], "minimum": 5},
+                "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}},
+                "additionalProperties": {"type": ["integer", "null"], "minimum": 5}
             },
             # additionalProperties in parent schema
             {
@@ -376,22 +373,16 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
                     {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}
                 ],
                 "properties": {"bar": {"maximum": 5}},
-                "additionalProperties": {"type": ["integer", "null"]},
+                "additionalProperties": {"type": ["integer", "null"]}
             },
             # additionalProperties in allOf
             {
                 "allOf": [
-                    {
-                        "properties": {"foo": {"maximum": 4}},
-                        "additionalProperties": {"minimum": 5},
-                    },
-                    {
-                        "properties": {"bar": {"maximum": 5}},
-                        "additionalProperties": {"type": ["integer", "null"]},
-                    },
+                    {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}},
+                    {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}}
                 ]
             },
-        ],
+        ]
     )
     def test_additionalProperties_in_allOf(self, schema, test_object, valid):
         if valid:
@@ -405,19 +396,19 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid):
     @pytest.mark.parametrize(
         "test_object, valid",
         [
-            ({}, True),  # empty object is valid
-            ({"foo": 1}, False),  # foo is not a string
-            ({"foo": "x"}, False),  # foo is not an integer
-            ({"foo": True}, False),  # foo is not a string or an integer
-        ],
+            ({}, True), # empty object is valid
+            ({"foo": 1}, False), # foo is not a string
+            ({"foo": "x"}, False), # foo is not an integer
+            ({"foo": True}, False), # foo is not a string or an integer
+        ]
     )
     def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
         schema = {
             "type": "object",
             "allOf": [
                 {"additionalProperties": {"type": "integer"}},
-                {"additionalProperties": {"type": "string"}},
-            ],
+                {"additionalProperties": {"type": "string"}}
+            ]
         }
         if valid:
             validate(instance=test_object, schema=schema)
@@ -449,18 +440,15 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
             ([0, 5, 4], False),
             # invalid: baz is not an integer or null
             ([0, 5, "quxx"], False),
-        ],
+        ]
     )
     @pytest.mark.parametrize(
         "schema",
         [
             # The following are equivalent to this:
             {
-                "prefixItems": [
-                    {"type": ["integer", "null"], "maximum": 4},
-                    {"minimum": 5, "maximum": 5},
-                ],
-                "items": {"type": ["integer", "null"], "minimum": 5},
+                "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}],
+                "items": {"type": ["integer", "null"], "minimum": 5}
             },
             # items in parent schema
             {
@@ -468,19 +456,17 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
                     {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}},
                 ],
                 "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}],
-                "items": {"type": ["integer", "null"]},
+                "items": {"type": ["integer", "null"]}
+
             },
             # items in allOf
             {
                 "allOf": [
                     {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}},
-                    {
-                        "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}],
-                        "items": {"type": ["integer", "null"]},
-                    },
+                    {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}}
                 ]
             },
-        ],
+        ]
     )
     def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid):
         if valid:
diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index 3a3b553d9..b7a663a91 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -1,22 +1,22 @@
 import json
-from json import dumps as json_dumps
 
 import pytest
 import re
 from jsonschema import ValidationError, validate
+from json import dumps as json_dumps
 
 from guidance import json as gen_json
 from guidance import models
-from guidance.library._json import IGNORED_KEYS
 
+from guidance.library._json import IGNORED_KEYS
 from .utils import check_match_failure, generate_and_check
 
+
 # Common sets of allowed_bytes
 INTEGER_LEADING = {b"-", b"0", *{bytes([i]) for i in range(ord("1"), ord("9") + 1)}}
 INTEGER_FOLLOWING = {bytes([i]) for i in range(ord("0"), ord("9") + 1)}
 A_to_Z = {bytes([i]) for i in range(ord("A"), ord("Z") + 1)}
 
-
 def test_null():
     schema = """{"type": "null" }"""
 
@@ -171,15 +171,11 @@ class TestBoundedNumeric:
             (-5, {"type": "integer", "minimum": -5}, True),
             pytest.param(
                 *(5.0, {"type": "integer", "minimum": 5}, True),
-                marks=pytest.mark.xfail(
-                    reason="JSON technically allows trailing zeroes, but we currently don't"
-                ),
+                marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't")
             ),
             pytest.param(
                 *(-5.0, {"type": "integer", "minimum": -5}, True),
-                marks=pytest.mark.xfail(
-                    reason="JSON technically allows trailing zeroes, but we currently don't"
-                ),
+                marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't")
             ),
             (5.1, {"type": "integer", "minimum": 5}, False),
             (-5.1, {"type": "integer", "minimum": -5}, False),
@@ -239,11 +235,7 @@ class TestBoundedNumeric:
             (5.1, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True),
             (-9.9, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True),
             (5.0, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, False),
-            (
-                -10.0,
-                {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0},
-                False,
-            ),
+            (-10.0, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, False),
             (9.9, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True),
             (-5.1, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True),
             # --- Edge cases ---
@@ -284,10 +276,10 @@ class TestBoundedNumeric:
             (0.2999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, True),
             (-0.2999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, True),
             (0.0999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False),
-            (-0.0999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False),
+            (-0.0999, {"type": "number", "minimum": -.3, "maximum": -0.1}, False),
             (0.3001, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False),
             (-0.3001, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False),
-        ],
+        ]
     )
     def test_numeric_validation(self, instance, schema, should_pass):
         # Sanity check
@@ -297,7 +289,10 @@ def test_numeric_validation(self, instance, schema, should_pass):
         else:
             with pytest.raises(ValidationError):
                 validate(instance, schema=schema)
-            check_match_failure(bad_string=json_dumps(instance), schema_obj=schema)
+            check_match_failure(
+                bad_string=json_dumps(instance),
+                schema_obj=schema
+            )
 
 
 class TestString:
@@ -378,7 +373,9 @@ def test_regex_bad(self, bad_string: str, good_bytes, failure_byte, allowed_byte
             schema_obj=schema_obj,
         )
 
-    @pytest.mark.parametrize("string", ["aA\u001f", '"""'])
+    @pytest.mark.parametrize(
+        "string", ["aA\u001f", '"""']
+    )
     def test_regex_properly_escaped_good(self, string):
         schema_obj = {"type": "string", "pattern": r".{3}"}
         # First sanity check what we're setting up
@@ -391,15 +388,13 @@ def test_regex_properly_escaped_good(self, string):
         [
             (
                 '"\\u001f\\u001f\u001f',
-                b'"\\u001f\\u001f',  # able to match the first two stringified bytes
-                "\u001f".encode(),  # fails on a literal \x1f byte
-                None,  # hard to write a set of allowed bytes here
+                b'"\\u001f\\u001f', # able to match the first two stringified bytes
+                '\u001f'.encode(), # fails on a literal \x1f byte
+                None # hard to write a set of allowed bytes here
             ),
         ],
     )
-    def test_regex_properly_escaped_bad(
-        self, bad_string: str, good_bytes, failure_byte, allowed_bytes
-    ):
+    def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_byte, allowed_bytes):
         # Note that the strings being fed in include the double quotes required
         # to make them JSON strings
         schema_obj = {"type": "string", "pattern": r".{3}"}
@@ -411,6 +406,7 @@ def test_regex_properly_escaped_bad(
             schema_obj=schema_obj,
         )
 
+
     @pytest.mark.parametrize(
         "my_string", ["a", "bb", "ccc", "150", ",?", ".\t\n", "(){", "aA7", "\\9O"]
     )
@@ -753,31 +749,25 @@ def test_required_is_required(self):
         generate_and_check({"b": 1}, schema)
         generate_and_check({"a": 1, "b": "xyz"}, schema)
         check_match_failure(
-            bad_string=json_dumps({"a": 1}),
+            bad_string=json_dumps(
+                {"a": 1}
+            ),
             schema_obj=schema,
         )
 
     def test_validated_against_additionalProperties(self):
-        schema = {
-            "type": "object",
-            "properties": {"a": {"type": "integer"}},
-            "required": ["b"],
-            "additionalProperties": {"type": "integer"},
-        }
+        schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b"], "additionalProperties": {"type": "integer"}}
         generate_and_check({"b": 1}, schema)
         generate_and_check({"a": 1, "b": 42}, schema)
         check_match_failure(
-            bad_string=json_dumps({"a": 1, "b": "string"}),
+            bad_string=json_dumps(
+                {"a": 1, "b": "string"}
+            ),
             schema_obj=schema,
         )
 
     def test_false_additionalProperties_fails(self):
-        schema = {
-            "type": "object",
-            "properties": {"a": {"type": "integer"}},
-            "required": ["b", "c"],
-            "additionalProperties": False,
-        }
+        schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b", "c"], "additionalProperties": False}
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert (
@@ -850,6 +840,7 @@ def test_object_list(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
+
     @pytest.mark.parametrize(
         ["bad_string", "good_bytes", "failure_byte", "allowed_bytes"],
         [
@@ -1010,6 +1001,7 @@ def test_good_with_items(self, min_items, max_items, target_obj):
         }
         generate_and_check(target_obj, schema_obj)
 
+
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1090,6 +1082,7 @@ def test_bad_with_prefix_and_items(
             schema_obj=schema_obj,
         )
 
+
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1154,6 +1147,7 @@ def test_bad_with_prefix(
             schema_obj=schema_obj,
         )
 
+
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1370,12 +1364,13 @@ def test_allOf_ref(self):
         generate_and_check(target_obj, schema_obj)
 
     def test_allOf_bad_schema(self):
-        schema = {"allOf": [{"type": "integer"}, {"type": "string"}]}
+        schema = {
+            "allOf" : [{ "type": "integer" }, { "type": "string" }]
+        }
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert ve.value.args[0] == "allOf has conflicting types: [{'integer'}, {'string'}]"
 
-
 class TestOneOf:
     @pytest.mark.parametrize("target_obj", [123, 42])
     def test_oneOf_simple(self, target_obj):
@@ -1390,6 +1385,7 @@ def test_oneOf_simple(self, target_obj):
         # The actual check
         generate_and_check(target_obj, schema_obj)
 
+
     @pytest.mark.parametrize("target_obj", [123, True])
     def test_oneOf_compound(self, target_obj):
         schema = """{
@@ -1427,6 +1423,7 @@ def test_enum(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
+
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1446,6 +1443,7 @@ def test_bad_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes):
             schema_obj=schema_obj,
         )
 
+
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1473,10 +1471,13 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes)
             ("2", False),
             ("1", False),
             (True, False),
-        ],
+        ]
     )
     def test_typed_enum_single_type(self, obj, valid):
-        schema_obj = {"enum": [1, "2", True], "type": "integer"}
+        schema_obj = {
+            "enum": [1, "2", True],
+            "type": "integer"
+        }
         if valid:
             validate(instance=obj, schema=schema_obj)
             generate_and_check(obj, schema_obj)
@@ -1493,10 +1494,13 @@ def test_typed_enum_single_type(self, obj, valid):
             ("2", True),
             ("1", False),
             (True, False),
-        ],
+        ]
     )
     def test_typed_enum_multiple_types(self, obj, valid):
-        schema_obj = {"enum": [1, "2", True], "type": ["integer", "string"]}
+        schema_obj = {
+            "enum": [1, "2", True],
+            "type": ["integer", "string"]
+        }
         if valid:
             validate(instance=obj, schema=schema_obj)
             generate_and_check(obj, schema_obj)
@@ -1506,12 +1510,14 @@ def test_typed_enum_multiple_types(self, obj, valid):
             check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj)
 
     def test_invalid_typed_enum(self):
-        schema_obj = {"enum": [1, "2"], "type": "boolean"}
+        schema_obj = {
+            "enum": [1, "2"],
+            "type": "boolean"
+        }
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
         assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']"
 
-
 class TestConst:
     def test_constant_int(self):
         # First sanity check what we're setting up
@@ -1571,29 +1577,45 @@ def test_constant_precedence(self):
         )
 
     def test_valid_typed_const(self):
-        schema_obj = {"const": 1, "type": "integer"}
+        schema_obj = {
+            "const": 1,
+            "type": "integer"
+        }
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
 
     def test_invalid_typed_const(self):
-        schema_obj = {"const": 1, "type": "boolean"}
+        schema_obj = {
+            "const": 1,
+            "type": "boolean"
+        }
         with pytest.raises(ValidationError):
             gen_json(schema=schema_obj)
 
     def test_valid_enum_const(self):
-        schema_obj = {"const": 1, "enum": [1, 2, 3]}
+        schema_obj = {
+            "const": 1,
+            "enum": [1, 2, 3]
+        }
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
 
     def test_invalid_enum_const(self):
-        schema_obj = {"const": 1, "enum": [2, 3]}
+        schema_obj = {
+            "const": 1,
+            "enum": [2, 3]
+        }
         with pytest.raises(ValidationError):
             gen_json(schema=schema_obj)
 
     def test_valid_typed_enum_const(self):
-        schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"}
+        schema_obj = {
+            "const": 1,
+            "enum": [1, "2", 3],
+            "type": "integer"
+        }
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
@@ -1601,13 +1623,17 @@ def test_valid_typed_enum_const(self):
     @pytest.mark.parametrize(
         "const",
         [
-            "2",  # right enum, wrong type
-            2,  # wrong enum, right type
-            "3",  # wrong enum, wrong type
-        ],
+            "2", # right enum, wrong type
+            2, # wrong enum, right type
+            "3", # wrong enum, wrong type
+        ]
     )
     def test_invalid_typed_enum_const(self, const):
-        schema_obj = {"const": const, "enum": [1, "2", 3], "type": "integer"}
+        schema_obj = {
+            "const": const,
+            "enum": [1, "2", 3],
+            "type": "integer"
+        }
         with pytest.raises(ValidationError):
             gen_json(schema=schema_obj)
 
@@ -1655,15 +1681,11 @@ def test_simple_additional_properties(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
+
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
-            (
-                {"a": "1"},
-                b'{"a": ',
-                b'"',
-                INTEGER_LEADING,
-            ),
+            ({"a": "1"}, b'{"a": ', b'"', INTEGER_LEADING, ),
             (
                 {"a": 1, "b": 1.5},
                 b'{"a": 1, "b": 1',
@@ -1683,7 +1705,9 @@ def test_simple_bad_type(self, bad_obj, good_bytes, failure_byte, allowed_bytes)
             schema_obj=schema_obj,
         )
 
-    @pytest.mark.parametrize("target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}])
+    @pytest.mark.parametrize(
+        "target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}]
+    )
     def test_anyOf_additional_properties(self, target_obj):
         # First sanity check what we're setting up
         schema_obj = json.loads(self.anyOf_schema)
@@ -1692,6 +1716,7 @@ def test_anyOf_additional_properties(self, target_obj):
         # The actual check
         generate_and_check(target_obj, schema_obj)
 
+
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1733,6 +1758,7 @@ def test_properties_and_additional_properties(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
+
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1741,7 +1767,9 @@ def test_properties_and_additional_properties(self, target_obj, temperature):
             ({"a": 1, "b": 2}, b'{"', b"a", {b"m"}),
         ],
     )
-    def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, allowed_bytes):
+    def test_combined_missing_properties(
+        self, bad_obj, good_bytes, failure_byte, allowed_bytes
+    ):
         schema_obj = json.loads(self.combined_schema)
         bad_string = json_dumps(bad_obj)
         check_match_failure(
@@ -1752,6 +1780,7 @@ def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, al
             schema_obj=schema_obj,
         )
 
+
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1880,6 +1909,7 @@ def test_empty_schema(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
+
     @pytest.mark.parametrize(
         "bad_string, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1908,7 +1938,9 @@ def test_empty_schema(self, target_obj, temperature):
             ),
         ],
     )
-    def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_bytes):
+    def test_bad_empty_schema(
+        self, bad_string, good_bytes, failure_byte, allowed_bytes
+    ):
         schema_obj = json.loads(self.empty_schema)
         check_match_failure(
             bad_string=bad_string,
@@ -1924,12 +1956,7 @@ def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_by
             # Empty property
             {"type": "object", "properties": {"a": {}}, "required": ["a"]},
             # Empty reference
-            {
-                "type": "object",
-                "properties": {"a": {"$ref": "#/$defs/A"}},
-                "$defs": {"A": {}},
-                "required": ["a"],
-            },
+            {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]},
         ],
     )
     @pytest.mark.parametrize(
@@ -1960,14 +1987,10 @@ def test_nested_empty_schema(self, schema_obj, target_obj, temperature):
             # Empty property
             {"type": "object", "properties": {"a": {}}, "required": ["a"]},
             # Empty reference
-            {
-                "type": "object",
-                "properties": {"a": {"$ref": "#/$defs/A"}},
-                "$defs": {"A": {}},
-                "required": ["a"],
-            },
+            {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]},
         ],
     )
+
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -2010,6 +2033,7 @@ def test_nested_empty_schema_with_props(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
+
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -2044,6 +2068,7 @@ def test_items(self, schema_obj):
             [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj
         )
 
+
     def test_no_items(self):
         schema_obj = {"type": "array", "items": False}
         check_match_failure(
@@ -2076,6 +2101,7 @@ def test_additionalProperties(self, schema_obj):
             schema_obj,
         )
 
+
     def test_no_additionalProperties(self):
         schema_obj = {"type": "object", "additionalProperties": False}
         check_match_failure(
@@ -2086,17 +2112,17 @@ def test_no_additionalProperties(self):
             schema_obj=schema_obj,
         )
 
-
 def test_ignored_keys_allowed_as_properties():
     schema_obj = {
         "type": "object",
-        "properties": {key: {"type": "string"} for key in IGNORED_KEYS},
+        "properties": {
+            key: {"type": "string"} for key in IGNORED_KEYS
+        },
         "required": list(IGNORED_KEYS),
     }
     target_obj = {key: "value" for key in IGNORED_KEYS}
     generate_and_check(target_obj, schema_obj)
 
-
 class TestRequiredProperties:
     schema_obj = {
         "type": "object",
@@ -2105,19 +2131,10 @@ class TestRequiredProperties:
             "b": {"type": "number"},
             "c": {"type": "boolean"},
         },
-        "additionalProperties": True,
+        "additionalProperties": True
     }
     ALL_REQUIRED = ["a", "b", "c"]
-    SOME_REQUIRED_SUBSETS = [
-        [],
-        ["a"],
-        ["b"],
-        ["c"],
-        ["a", "b"],
-        ["a", "c"],
-        ["b", "c"],
-        ["a", "b", "c"],
-    ]
+    SOME_REQUIRED_SUBSETS = [[], ["a"], ["b"], ["c"], ["a", "b"], ["a", "c"], ["b", "c"], ["a", "b", "c"]]
     NONE_REQUIRED: list[str] = []
 
     @pytest.mark.parametrize(
@@ -2126,7 +2143,7 @@ class TestRequiredProperties:
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ],
+        ]
     )
     def test_all_required_good(self, extra_items):
         schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED}
@@ -2146,7 +2163,7 @@ def test_all_required_good(self, extra_items):
             ({"c": True}),
             # Missing all
             ({}),
-        ],
+        ]
     )
     def test_all_required_bad(self, bad_obj):
         schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED}
@@ -2161,7 +2178,7 @@ def test_all_required_bad(self, bad_obj):
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ],
+        ]
     )
     @pytest.mark.parametrize(
         "required",
@@ -2199,7 +2216,7 @@ def test_some_required_bad(self, required):
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ],
+        ]
     )
     @pytest.mark.parametrize(
         "target_obj",
@@ -2212,48 +2229,55 @@ def test_some_required_bad(self, required):
             {"a": "hello", "c": True},
             {"b": 42, "c": True},
             {"a": "hello", "b": 42, "c": True},
-        ],
+        ]
     )
     def test_none_required(self, target_obj, extra_items):
         schema_obj = {**self.schema_obj, "required": self.NONE_REQUIRED}
         generate_and_check({**target_obj, **extra_items}, schema_obj)
 
-
 class TestRequiredPropertiesScaling:
-    @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100])
+    @pytest.mark.parametrize(
+        "num_properties",
+        [1, 2, 3, 4, 5, 10, 20, 50, 100]
+    )
     def test_many_optional_properties_doesnt_blow_up(self, num_properties):
         schema_obj = {
             "type": "object",
-            "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)},
-            "required": [],  # Empty should be worst-case scenario
+            "properties": {
+                f"prop_{i}": {"type": "string"} for i in range(num_properties)
+            },
+            "required": [] # Empty should be worst-case scenario
         }
         from guidance.library._json import GenJson
-
         genjson = GenJson(schema=schema_obj)
         genjson._join.__wrapped__.cache_clear()
         _ = genjson.root()
         cache_info = genjson._join.__wrapped__.cache_info()
 
         # Theoretical number of cache misses under the current implementation
-        expected_misses = 2 * num_properties - 1
-        MISSES_MAGIC_NUMBER = 5  # Where in the world is this coming from?
+        expected_misses = 2*num_properties - 1
+        MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from?
         assert 0 < cache_info.misses <= expected_misses + MISSES_MAGIC_NUMBER
         # NOTE: that if the cache maxsize is hit, the number of misses will be more than expected
 
         # Theoretical number of total calls under the current implementation
-        expected_calls = num_properties * (num_properties - 1) // 2
-        CALLS_MAGIC_NUMBER = 12  # Where in the world is this coming from?
+        expected_calls = num_properties*(num_properties - 1) // 2
+        CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from?
         assert 0 < cache_info.hits + cache_info.misses <= expected_calls + CALLS_MAGIC_NUMBER
 
-    @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100])
+    @pytest.mark.parametrize(
+        "num_properties",
+        [1, 2, 3, 4, 5, 10, 20, 50, 100]
+    )
     def test_all_required_properties_doesnt_blow_up(self, num_properties):
         schema_obj = {
             "type": "object",
-            "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)},
-            "required": [f"prop_{i}" for i in range(num_properties)],
+            "properties": {
+                f"prop_{i}": {"type": "string"} for i in range(num_properties)
+            },
+            "required": [f"prop_{i}" for i in range(num_properties)]
         }
         from guidance.library._json import GenJson
-
         genjson = GenJson(schema=schema_obj)
         genjson._join.__wrapped__.cache_clear()
         _ = genjson.root()
@@ -2281,7 +2305,7 @@ class TestBooleanSchema:
             {"a": [1, 2, 3]},
             {"a": {"b": 1}},
             False,
-            True,
+            True
         ],
     )
     def test_true_schema(self, target_obj):
@@ -2319,7 +2343,7 @@ class TestWhitespace:
             ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
             # Static object: const (both item and key seps)
             ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
-        ],
+        ]
     )
     @pytest.mark.parametrize(
         "separators",
@@ -2345,7 +2369,7 @@ def test_separators(self, separators, schema, obj):
             ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
             # Static object: const (both item and key seps)
             ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
-        ],
+        ]
     )
     @pytest.mark.parametrize(
         "separators",
diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py
index f2248129d..fd1136058 100644
--- a/tests/unit/library/json/test_refs.py
+++ b/tests/unit/library/json/test_refs.py
@@ -1,10 +1,9 @@
-from json import dumps as json_dumps
-
 import pytest
 from jsonschema import ValidationError, validate
 
-from .utils import check_match_failure, generate_and_check
+from json import dumps as json_dumps
 
+from .utils import check_match_failure, generate_and_check
 
 class TestRefs:
     @pytest.mark.parametrize(
@@ -439,15 +438,9 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
             # invalid on inner field
             ({"bar": "a", "foo": {"bar": 1}}, False),
             # invalid on outer field
-            ({"bar": 1, "foo": {"bar": "a"}}, False),
+            ({ "bar": 1, "foo": {"bar": "a"}}, False),
             # valid on both fields
-            (
-                {
-                    "bar": "a",
-                    "foo": {"bar": "a"},
-                },
-                True,
-            ),
+            ({"bar": "a", "foo": {"bar": "a"}, }, True),
         ],
     )
     def test_refs_with_relative_uris_and_defs(self, test_object, valid):
@@ -981,4 +974,4 @@ def test_empty_tokens_in_ref_json_pointer(self, test_object, valid):
         else:
             with pytest.raises(ValidationError):
                 validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
\ No newline at end of file
diff --git a/tests/unit/library/json/test_string_format.py b/tests/unit/library/json/test_string_format.py
index 7b2dd9bdc..09712fb45 100644
--- a/tests/unit/library/json/test_string_format.py
+++ b/tests/unit/library/json/test_string_format.py
@@ -1,8 +1,7 @@
 """Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/optional/format"""
 
-import json
-
 import pytest
+import json
 
 from .utils import check_match_failure, generate_and_check
 
@@ -45,35 +44,17 @@ def test_good(self, target_str):
         "bad_str",
         [
             '"2020-01-32"',  # a invalid date string with 32 days in January
-            pytest.param(
-                '"2021-02-29"',
-                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
-            ),  # a invalid date string with 29 days in February (normal)
-            pytest.param(
-                '"2020-02-30"',
-                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
-            ),  # a invalid date string with 30 days in February (leap)
+            pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 29 days in February (normal)
+            pytest.param('"2020-02-30"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 30 days in February (leap)
             '"2020-03-32"',  # a invalid date string with 32 days in March
-            pytest.param(
-                '"2020-04-31"',
-                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
-            ),  # a invalid date string with 31 days in April
+            pytest.param('"2020-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in April
             '"2020-05-32"',  # a invalid date string with 32 days in May
-            pytest.param(
-                '"2020-06-31"',
-                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
-            ),  # a invalid date string with 31 days in June
+            pytest.param('"2020-06-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in June
             '"2020-07-32"',  # a invalid date string with 32 days in July
             '"2020-08-32"',  # a invalid date string with 32 days in August
-            pytest.param(
-                '"2020-09-31"',
-                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
-            ),  # a invalid date string with 31 days in September
+            pytest.param('"2020-09-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in September
             '"2020-10-32"',  # a invalid date string with 32 days in October
-            pytest.param(
-                '"2020-11-31"',
-                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
-            ),  # a invalid date string with 31 days in November
+            pytest.param('"2020-11-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in November
             '"2020-12-32"',  # a invalid date string with 32 days in December
             '"2020-13-01"',  # a invalid date string with invalid month
             '"06/19/1963"',  # an invalid date string
@@ -81,13 +62,8 @@ def test_good(self, target_str):
             '"1998-1-20"',  # non-padded month dates are not valid
             '"1998-01-1"',  # non-padded day dates are not valid
             '"1998-13-01"',  # invalid month
-            pytest.param(
-                '"1998-04-31"',
-                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
-            ),  # invalid month-day combination
-            pytest.param(
-                '"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")
-            ),  # 2021 is not a leap year
+            pytest.param('"1998-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # invalid month-day combination
+            pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")),  # 2021 is not a leap year
             '"1963-06-1\\u09ea"',  # invalid non-ASCII '৪' (a Bengali 4)
             '"20230328"',  # ISO8601 / non-RFC3339: YYYYMMDD without dashes (2023-03-28)
             '"2023-W01"',  # ISO8601 / non-RFC3339: week number implicit day of week (2023-01-02)
@@ -161,7 +137,6 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
-
 @pytest.mark.xfail(reason="idn-hostname format not implemented")
 class TestIdnHostname:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"idn-hostname"}'
@@ -325,7 +300,6 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
-
 @pytest.mark.xfail(reason="iri-reference format is not yet implemented")
 class TestIriReference:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"iri-reference"}'
@@ -515,40 +489,20 @@ def test_good(self, target_str):
             '"008:030:006Z"',  # invalid time string with extra leading zeros
             '"8:3:6Z"',  # invalid time string with no leading zero for single digit
             '"8:0030:6Z"',  # hour, minute, second must be two digits
-            pytest.param(
-                '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # invalid leap second, Zulu (wrong hour)
-            pytest.param(
-                '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # invalid leap second, Zulu (wrong minute)
-            pytest.param(
-                '"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # invalid leap second, zero time-offset (wrong hour)
-            pytest.param(
-                '"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # invalid leap second, zero time-offset (wrong minute)
-            pytest.param(
-                '"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # invalid leap second, positive time-offset (wrong hour)
-            pytest.param(
-                '"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # invalid leap second, positive time-offset (wrong minute)
-            pytest.param(
-                '"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # invalid leap second, negative time-offset (wrong hour)
-            pytest.param(
-                '"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # invalid leap second, negative time-offset (wrong minute)
+            pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, Zulu (wrong hour)
+            pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, Zulu (wrong minute)
+            pytest.param('"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, zero time-offset (wrong hour)
+            pytest.param('"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, zero time-offset (wrong minute)
+            pytest.param('"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, positive time-offset (wrong hour)
+            pytest.param('"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, positive time-offset (wrong minute)
+            pytest.param('"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, negative time-offset (wrong hour)
+            pytest.param('"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, negative time-offset (wrong minute)
             '"08:30:06-8:000"',  # hour, minute in time-offset must be two digits
             '"24:00:00Z"',  # an invalid time string with invalid hour
             '"00:60:00Z"',  # an invalid time string with invalid minute
             '"00:00:61Z"',  # an invalid time string with invalid second
-            pytest.param(
-                '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # an invalid time string with invalid leap second (wrong hour)
-            pytest.param(
-                '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # an invalid time string with invalid leap second (wrong minute)
+            pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid time string with invalid leap second (wrong hour)
+            pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid time string with invalid leap second (wrong minute)
             '"01:02:03+24:00"',  # an invalid time string with invalid time numoffset hour
             '"01:02:03+00:60"',  # an invalid time string with invalid time numoffset minute
             '"01:02:03Z+00:30"',  # an invalid time string with invalid time with both Z and numoffset
@@ -584,23 +538,11 @@ class TestIpv6:
             '"::42:ff:1"',  # leading colons is valid
             '"d6::"',  # trailing colons is valid
             '"1:d6::42"',  # single set of double colons in the middle is valid
-            pytest.param(
-                '"1::d6:192.168.0.1"',
-                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
-            ),  # mixed format with the ipv4 section as decimal octets
-            pytest.param(
-                '"1:2::192.168.0.1"',
-                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
-            ),  # mixed format with double colons between the sections
-            pytest.param(
-                '"::ffff:192.168.0.1"',
-                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
-            ),  # mixed format with leading double colons (ipv4-mapped ipv6 address)
+            pytest.param('"1::d6:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with the ipv4 section as decimal octets
+            pytest.param('"1:2::192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with double colons between the sections
+            pytest.param('"::ffff:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with leading double colons (ipv4-mapped ipv6 address)
             '"1:2:3:4:5:6:7:8"',  # 8 octets
-            pytest.param(
-                '"1000:1000:1000:1000:1000:1000:255.255.255.255"',
-                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
-            ),  # a long valid ipv6
+            pytest.param('"1000:1000:1000:1000:1000:1000:255.255.255.255"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # a long valid ipv6
         ],
     )
     def test_good(self, target_str):
@@ -768,22 +710,11 @@ class TestEmail:
             '"te~st@example.com"',  # tilde in local part is valid
             '"~test@example.com"',  # tilde before local part is valid
             '"test~@example.com"',  # tilde after local part is valid
-            pytest.param(
-                '"\\"joe bloggs\\"@example.com"',
-                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
-            ),  # a quoted string with a space in the local part is valid
-            pytest.param(
-                '"\\"joe..bloggs\\"@example.com"',
-                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
-            ),  # a quoted string with a double dot in the local part is valid
-            pytest.param(
-                '"\\"joe@bloggs\\"@example.com"',
-                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
-            ),  # a quoted string with a @ in the local part is valid
+            pytest.param('"\\"joe bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a space in the local part is valid
+            pytest.param('"\\"joe..bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a double dot in the local part is valid
+            pytest.param('"\\"joe@bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a @ in the local part is valid
             '"joe.bloggs@[127.0.0.1]"',  # an IPv4-address-literal after the @ is valid
-            pytest.param(
-                '"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")
-            ),  # an IPv6-address-literal after the @ is valid
+            pytest.param('"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")),  # an IPv6-address-literal after the @ is valid
             '"te.s.t@example.com"',  # two separated dots inside local part are valid
             '"riedgar+guidance@example.com"',  # plus sign in local part is valid
         ],
@@ -929,16 +860,9 @@ def test_good(self, target_str):
         "bad_str",
         [
             '"1998-12-31T23:59:61Z"',  # an invalid date-time past leap second, UTC
-            pytest.param(
-                '"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # an invalid date-time with leap second on a wrong minute, UTC
-            pytest.param(
-                '"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
-            ),  # an invalid date-time with leap second on a wrong hour, UTC
-            pytest.param(
-                '"1990-02-31T15:59:59.123-08:00"',
-                marks=pytest.mark.xfail(reason="valid days not yet tied to month"),
-            ),  # an invalid day in date-time string
+            pytest.param('"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid date-time with leap second on a wrong minute, UTC
+            pytest.param('"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid date-time with leap second on a wrong hour, UTC
+            pytest.param('"1990-02-31T15:59:59.123-08:00"', marks=pytest.mark.xfail(reason="valid days not yet tied to month")),  # an invalid day in date-time string
             '"1990-12-31T15:59:59-24:00"',  # an invalid offset in date-time string
             '"1963-06-19T08:30:06.28123+01:00Z"',  # an invalid closing Z after time-zone offset
             '"06/19/1963 08:30:06 PST"',  # an invalid date-time string
@@ -953,7 +877,6 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
-
 @pytest.mark.xfail(reason="regex format not implemented")
 class TestRegex:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"regex"}'
diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py
index 5498d718c..d75c41d4b 100644
--- a/tests/unit/library/json/utils.py
+++ b/tests/unit/library/json/utils.py
@@ -1,7 +1,6 @@
 import json
 from functools import partial
-from json import dumps as json_dumps
-from json import loads as json_loads
+from json import loads as json_loads, dumps as json_dumps
 from typing import Any, Optional, Union
 
 from jsonschema import validate
@@ -9,15 +8,18 @@
 from guidance import json as gen_json
 from guidance.library._json import JSONSchema
 
-from ....utils import check_match_failure as _check_match_failure
-from ....utils import check_run_with_temperature
-from ....utils import generate_and_check as _generate_and_check
+from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check
+
+from jsonschema import validate
+
+
+import json
+from functools import partial
+from json import dumps as json_dumps, loads as json_loads
 
 
 def generate_and_check(
-    target_obj: Any,
-    schema_obj: Union[str, JSONSchema],
-    desired_temperature: Optional[float] = None,
+    target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None
 ):
     if isinstance(schema_obj, str):
         schema_obj = json_loads(schema_obj)
@@ -30,7 +32,9 @@ def generate_and_check(
     # Now test that the grammar can recognize and generate prepared_json
     # We partial in the grammar_callable
     if desired_temperature is not None:
-        grammar_callable = partial(gen_json, schema=schema_obj, temperature=desired_temperature)
+        grammar_callable = partial(
+            gen_json, schema=schema_obj, temperature=desired_temperature
+        )
     else:
         grammar_callable = partial(gen_json, schema=schema_obj)
 
@@ -57,4 +61,4 @@ def check_match_failure(
         failure_byte=failure_byte,
         allowed_bytes=allowed_bytes,
         grammar=grammar,
-    )
+    )
\ No newline at end of file

From cb4845bba3211bf3346231bd7cdfc312ca024d6e Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Mon, 4 Nov 2024 11:04:00 -0800
Subject: [PATCH 59/70] raise UnsatisfiableSchemaError in const/enum

---
 guidance/library/_json.py            | 15 ++++++++++-----
 tests/unit/library/json/test_json.py |  9 ++++++---
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index ac1d24470..dc2320bce 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -1022,10 +1022,13 @@ def const(
             schema_to_validate_against["enum"] = enum
         if schema_to_validate_against:
             # Raise a validation error if the value doesn't match the type
-            jsonschema.validate(
-                instance=value,
-                schema=schema_to_validate_against,
-            )
+            try:
+                jsonschema.validate(
+                    instance=value,
+                    schema=schema_to_validate_against,
+                )
+            except jsonschema.ValidationError as e:
+                raise UnsatisfiableSchemaError(f"const {value!r} does not match schema {schema_to_validate_against}") from e
         # Base case
         if isinstance(value, (type(None), bool, int, float, str)):
             return lm + json_dumps(value)
@@ -1063,11 +1066,13 @@ def enum(
         options: Sequence[Union[None, bool, int, float, str, Mapping, Sequence]],
         instance_type: Optional[Union[str, Sequence[str]]] = None,
     ):
+        if not options:
+            raise UnsatisfiableSchemaError("enum has no options")
         all_opts: list[GrammarFunction] = []
         for instance in options:
             try:
                 grm = self.const(value=instance, instance_type=instance_type)
-            except jsonschema.ValidationError:
+            except UnsatisfiableSchemaError:
                 continue
             all_opts.append(grm)
         if not all_opts:
diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index b7a663a91..3596d81e3 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -1590,8 +1590,9 @@ def test_invalid_typed_const(self):
             "const": 1,
             "type": "boolean"
         }
-        with pytest.raises(ValidationError):
+        with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
+        assert ve.value.args[0] == "const 1 does not match schema {'type': 'boolean'}"
 
     def test_valid_enum_const(self):
         schema_obj = {
@@ -1607,8 +1608,9 @@ def test_invalid_enum_const(self):
             "const": 1,
             "enum": [2, 3]
         }
-        with pytest.raises(ValidationError):
+        with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
+        assert ve.value.args[0] == "const 1 does not match schema {'enum': [2, 3]}"
 
     def test_valid_typed_enum_const(self):
         schema_obj = {
@@ -1634,8 +1636,9 @@ def test_invalid_typed_enum_const(self, const):
             "enum": [1, "2", 3],
             "type": "integer"
         }
-        with pytest.raises(ValidationError):
+        with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
+        assert ve.value.args[0] == f"const {const!r} does not match schema {{'type': 'integer', 'enum': [1, '2', 3]}}"
 
 
 class TestAdditionalProperties:

From 5489f17df0064a7a18b28020e6fdef0be0c8fc80 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 5 Nov 2024 10:35:11 -0800
Subject: [PATCH 60/70] black and isort

---
 tests/unit/library/json/test_allOf.py         |  59 ++--
 tests/unit/library/json/test_json.py          | 319 +++++++++---------
 tests/unit/library/json/test_refs.py          |  17 +-
 tests/unit/library/json/test_string_format.py | 137 ++++++--
 tests/unit/library/json/utils.py              |  24 +-
 5 files changed, 324 insertions(+), 232 deletions(-)

diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py
index 670878c71..261f40345 100644
--- a/tests/unit/library/json/test_allOf.py
+++ b/tests/unit/library/json/test_allOf.py
@@ -6,6 +6,7 @@
 from jsonschema import ValidationError, validate
 
 from guidance import json as gen_json
+
 from .utils import check_match_failure, generate_and_check
 
 
@@ -138,7 +139,6 @@ def test_allOf_simple_maximum(self, test_object, valid):
                 validate(instance=test_object, schema=schema)
             check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
 
-
     @pytest.mark.parametrize(
         ["test_object", "valid"],
         [
@@ -357,15 +357,18 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
             ({"foo": 0, "bar": 5, "baz": 4}, False),
             # invalid: baz is not an integer or null
             ({"foo": 0, "bar": 5, "baz": "quxx"}, False),
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "schema",
         [
             # The following are equivalent to this:
             {
-                "properties": {"foo": {"type": ["integer", "null"], "maximum": 4}, "bar": {"minimum": 5, "maximum": 5}},
-                "additionalProperties": {"type": ["integer", "null"], "minimum": 5}
+                "properties": {
+                    "foo": {"type": ["integer", "null"], "maximum": 4},
+                    "bar": {"minimum": 5, "maximum": 5},
+                },
+                "additionalProperties": {"type": ["integer", "null"], "minimum": 5},
             },
             # additionalProperties in parent schema
             {
@@ -373,16 +376,22 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
                     {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}
                 ],
                 "properties": {"bar": {"maximum": 5}},
-                "additionalProperties": {"type": ["integer", "null"]}
+                "additionalProperties": {"type": ["integer", "null"]},
             },
             # additionalProperties in allOf
             {
                 "allOf": [
-                    {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}},
-                    {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}}
+                    {
+                        "properties": {"foo": {"maximum": 4}},
+                        "additionalProperties": {"minimum": 5},
+                    },
+                    {
+                        "properties": {"bar": {"maximum": 5}},
+                        "additionalProperties": {"type": ["integer", "null"]},
+                    },
                 ]
             },
-        ]
+        ],
     )
     def test_additionalProperties_in_allOf(self, schema, test_object, valid):
         if valid:
@@ -396,19 +405,19 @@ def test_additionalProperties_in_allOf(self, schema, test_object, valid):
     @pytest.mark.parametrize(
         "test_object, valid",
         [
-            ({}, True), # empty object is valid
-            ({"foo": 1}, False), # foo is not a string
-            ({"foo": "x"}, False), # foo is not an integer
-            ({"foo": True}, False), # foo is not a string or an integer
-        ]
+            ({}, True),  # empty object is valid
+            ({"foo": 1}, False),  # foo is not a string
+            ({"foo": "x"}, False),  # foo is not an integer
+            ({"foo": True}, False),  # foo is not a string or an integer
+        ],
     )
     def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
         schema = {
             "type": "object",
             "allOf": [
                 {"additionalProperties": {"type": "integer"}},
-                {"additionalProperties": {"type": "string"}}
-            ]
+                {"additionalProperties": {"type": "string"}},
+            ],
         }
         if valid:
             validate(instance=test_object, schema=schema)
@@ -418,7 +427,6 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
                 validate(instance=test_object, schema=schema)
             check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
 
-
     @pytest.mark.parametrize(
         "test_object, valid",
         [
@@ -440,15 +448,18 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
             ([0, 5, 4], False),
             # invalid: baz is not an integer or null
             ([0, 5, "quxx"], False),
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "schema",
         [
             # The following are equivalent to this:
             {
-                "prefixItems": [{"type": ["integer", "null"], "maximum": 4}, {"minimum": 5, "maximum": 5}],
-                "items": {"type": ["integer", "null"], "minimum": 5}
+                "prefixItems": [
+                    {"type": ["integer", "null"], "maximum": 4},
+                    {"minimum": 5, "maximum": 5},
+                ],
+                "items": {"type": ["integer", "null"], "minimum": 5},
             },
             # items in parent schema
             {
@@ -456,17 +467,19 @@ def test_inconsistent_additionalProperties_in_allOf(self, test_object, valid):
                     {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}},
                 ],
                 "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}],
-                "items": {"type": ["integer", "null"]}
-
+                "items": {"type": ["integer", "null"]},
             },
             # items in allOf
             {
                 "allOf": [
                     {"prefixItems": [{"maximum": 4}], "items": {"minimum": 5}},
-                    {"prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}], "items": {"type": ["integer", "null"]}}
+                    {
+                        "prefixItems": [{"type": ["integer", "null"]}, {"maximum": 5}],
+                        "items": {"type": ["integer", "null"]},
+                    },
                 ]
             },
-        ]
+        ],
     )
     def test_items_and_prefixitems_in_allOf(self, schema, test_object, valid):
         if valid:
diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index 3596d81e3..7c9cfd38a 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -1,22 +1,22 @@
 import json
+import re
+from json import dumps as json_dumps
 
 import pytest
-import re
 from jsonschema import ValidationError, validate
-from json import dumps as json_dumps
 
 from guidance import json as gen_json
 from guidance import models
-
 from guidance.library._json import IGNORED_KEYS
-from .utils import check_match_failure, generate_and_check
 
+from .utils import check_match_failure, generate_and_check
 
 # Common sets of allowed_bytes
 INTEGER_LEADING = {b"-", b"0", *{bytes([i]) for i in range(ord("1"), ord("9") + 1)}}
 INTEGER_FOLLOWING = {bytes([i]) for i in range(ord("0"), ord("9") + 1)}
 A_to_Z = {bytes([i]) for i in range(ord("A"), ord("Z") + 1)}
 
+
 def test_null():
     schema = """{"type": "null" }"""
 
@@ -83,16 +83,17 @@ def test_bad_integer(self, bad_string, good_bytes, failure_byte, allowed_bytes):
             {"type": "integer", "minimum": 5, "maximum": 4},
             {"type": "integer", "minimum": 5, "exclusiveMaximum": 5},
             {"type": "integer", "exclusiveMinimum": 5, "maximum": 5},
-        ]
+        ],
     )
     def test_unsatisfiable_min_max(self, schema):
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert re.fullmatch(
             r"(exclusiveMinimum|minimum) \(5\) is (greater than|equal to) (exclusiveMaximum|maximum) \((4|5)\)",
-            ve.value.args[0]
+            ve.value.args[0],
         )
 
+
 class TestNumber:
     schema = """{"type": "number" }"""
 
@@ -152,16 +153,17 @@ def test_bad_number(self, bad_string, good_bytes, failure_byte, allowed_bytes):
             {"type": "integer", "minimum": 5, "maximum": 4},
             {"type": "integer", "minimum": 5, "exclusiveMaximum": 5},
             {"type": "integer", "exclusiveMinimum": 5, "maximum": 5},
-        ]
+        ],
     )
     def test_unsatisfiable_min_max(self, schema):
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert re.fullmatch(
             r"(exclusiveMinimum|minimum) \(5\) is (greater than|equal to) (exclusiveMaximum|maximum) \((4|5)\)",
-            ve.value.args[0]
+            ve.value.args[0],
         )
 
+
 class TestBoundedNumeric:
     @pytest.mark.parametrize(
         "instance, schema, should_pass",
@@ -171,11 +173,15 @@ class TestBoundedNumeric:
             (-5, {"type": "integer", "minimum": -5}, True),
             pytest.param(
                 *(5.0, {"type": "integer", "minimum": 5}, True),
-                marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't")
+                marks=pytest.mark.xfail(
+                    reason="JSON technically allows trailing zeroes, but we currently don't"
+                ),
             ),
             pytest.param(
                 *(-5.0, {"type": "integer", "minimum": -5}, True),
-                marks=pytest.mark.xfail(reason="JSON technically allows trailing zeroes, but we currently don't")
+                marks=pytest.mark.xfail(
+                    reason="JSON technically allows trailing zeroes, but we currently don't"
+                ),
             ),
             (5.1, {"type": "integer", "minimum": 5}, False),
             (-5.1, {"type": "integer", "minimum": -5}, False),
@@ -235,7 +241,11 @@ class TestBoundedNumeric:
             (5.1, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True),
             (-9.9, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True),
             (5.0, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, False),
-            (-10.0, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, False),
+            (
+                -10.0,
+                {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0},
+                False,
+            ),
             (9.9, {"type": "number", "exclusiveMinimum": 5.0, "exclusiveMaximum": 10.0}, True),
             (-5.1, {"type": "number", "exclusiveMinimum": -10.0, "exclusiveMaximum": -5.0}, True),
             # --- Edge cases ---
@@ -276,10 +286,10 @@ class TestBoundedNumeric:
             (0.2999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, True),
             (-0.2999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, True),
             (0.0999, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False),
-            (-0.0999, {"type": "number", "minimum": -.3, "maximum": -0.1}, False),
+            (-0.0999, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False),
             (0.3001, {"type": "number", "minimum": 0.1, "maximum": 0.3}, False),
             (-0.3001, {"type": "number", "minimum": -0.3, "maximum": -0.1}, False),
-        ]
+        ],
     )
     def test_numeric_validation(self, instance, schema, should_pass):
         # Sanity check
@@ -289,10 +299,7 @@ def test_numeric_validation(self, instance, schema, should_pass):
         else:
             with pytest.raises(ValidationError):
                 validate(instance, schema=schema)
-            check_match_failure(
-                bad_string=json_dumps(instance),
-                schema_obj=schema
-            )
+            check_match_failure(bad_string=json_dumps(instance), schema_obj=schema)
 
 
 class TestString:
@@ -373,9 +380,7 @@ def test_regex_bad(self, bad_string: str, good_bytes, failure_byte, allowed_byte
             schema_obj=schema_obj,
         )
 
-    @pytest.mark.parametrize(
-        "string", ["aA\u001f", '"""']
-    )
+    @pytest.mark.parametrize("string", ["aA\u001f", '"""'])
     def test_regex_properly_escaped_good(self, string):
         schema_obj = {"type": "string", "pattern": r".{3}"}
         # First sanity check what we're setting up
@@ -388,13 +393,15 @@ def test_regex_properly_escaped_good(self, string):
         [
             (
                 '"\\u001f\\u001f\u001f',
-                b'"\\u001f\\u001f', # able to match the first two stringified bytes
-                '\u001f'.encode(), # fails on a literal \x1f byte
-                None # hard to write a set of allowed bytes here
+                b'"\\u001f\\u001f',  # able to match the first two stringified bytes
+                "\u001f".encode(),  # fails on a literal \x1f byte
+                None,  # hard to write a set of allowed bytes here
             ),
         ],
     )
-    def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_byte, allowed_bytes):
+    def test_regex_properly_escaped_bad(
+        self, bad_string: str, good_bytes, failure_byte, allowed_bytes
+    ):
         # Note that the strings being fed in include the double quotes required
         # to make them JSON strings
         schema_obj = {"type": "string", "pattern": r".{3}"}
@@ -406,7 +413,6 @@ def test_regex_properly_escaped_bad(self, bad_string: str, good_bytes, failure_b
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "my_string", ["a", "bb", "ccc", "150", ",?", ".\t\n", "(){", "aA7", "\\9O"]
     )
@@ -729,7 +735,9 @@ def test_unsatisfiable_properties_raises(self):
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert ve.value.args[0] == "Required property 'b' is unsatisfiable"
-        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        assert (
+            ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        )
 
     def test_unsatisfiable_additional_properties_raises(self):
         schema = {
@@ -740,8 +748,14 @@ def test_unsatisfiable_additional_properties_raises(self):
         }
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
-        assert ve.value.args[0] == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b']"
-        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        assert (
+            ve.value.args[0]
+            == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b']"
+        )
+        assert (
+            ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        )
+
 
 class TestObjectWithMissingRequired:
     def test_required_is_required(self):
@@ -749,32 +763,40 @@ def test_required_is_required(self):
         generate_and_check({"b": 1}, schema)
         generate_and_check({"a": 1, "b": "xyz"}, schema)
         check_match_failure(
-            bad_string=json_dumps(
-                {"a": 1}
-            ),
+            bad_string=json_dumps({"a": 1}),
             schema_obj=schema,
         )
 
     def test_validated_against_additionalProperties(self):
-        schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b"], "additionalProperties": {"type": "integer"}}
+        schema = {
+            "type": "object",
+            "properties": {"a": {"type": "integer"}},
+            "required": ["b"],
+            "additionalProperties": {"type": "integer"},
+        }
         generate_and_check({"b": 1}, schema)
         generate_and_check({"a": 1, "b": 42}, schema)
         check_match_failure(
-            bad_string=json_dumps(
-                {"a": 1, "b": "string"}
-            ),
+            bad_string=json_dumps({"a": 1, "b": "string"}),
             schema_obj=schema,
         )
 
     def test_false_additionalProperties_fails(self):
-        schema = {"type": "object", "properties": {"a": {"type": "integer"}}, "required": ["b", "c"], "additionalProperties": False}
+        schema = {
+            "type": "object",
+            "properties": {"a": {"type": "integer"}},
+            "required": ["b", "c"],
+            "additionalProperties": False,
+        }
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert (
             ve.value.args[0]
             == "Required properties not in properties but additionalProperties is unsatisfiable. Missing required properties: ['b', 'c']"
         )
-        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        assert (
+            ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        )
 
 
 class TestSimpleArray:
@@ -840,7 +862,6 @@ def test_object_list(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         ["bad_string", "good_bytes", "failure_byte", "allowed_bytes"],
         [
@@ -866,17 +887,14 @@ def test_bad_object(self, bad_string, good_bytes, failure_byte, allowed_bytes):
         )
 
     def test_unsatisfiable_prefixItem_ok(self):
-        schema = {
-            "type": "array",
-            "prefixItems": [{"type": "integer"}, False]
-        }
+        schema = {"type": "array", "prefixItems": [{"type": "integer"}, False]}
         generate_and_check([42], schema)
         check_match_failure(
             bad_string="[42, 43]",
             good_bytes=b"[42",
             failure_byte=b",",
             allowed_bytes={b"]"} | INTEGER_FOLLOWING,
-            schema_obj=schema
+            schema_obj=schema,
         )
 
     def test_unsatisfiable_prefixItem_raises(self):
@@ -888,13 +906,15 @@ def test_unsatisfiable_prefixItem_raises(self):
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert ve.value.args[0] == "prefixItems[1] is unsatisfiable but min_items is 2"
-        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        assert (
+            ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        )
 
     def test_unsatisfiable_items_ok(self):
         schema = {
             "type": "array",
             "prefixItems": [{"type": "integer"}],
-            "items": {"allOf": [{"type": "integer"}, False]}
+            "items": {"allOf": [{"type": "integer"}, False]},
         }
         generate_and_check([42], schema)
         check_match_failure(
@@ -902,7 +922,7 @@ def test_unsatisfiable_items_ok(self):
             good_bytes=b"[42",
             failure_byte=b",",
             allowed_bytes={b"]"} | INTEGER_FOLLOWING,
-            schema_obj=schema
+            schema_obj=schema,
         )
 
     def test_unsatisfiable_items_raises(self):
@@ -914,9 +934,13 @@ def test_unsatisfiable_items_raises(self):
         }
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
-        assert ve.value.args[0] == "prefixItems has too few elements (1) to satisfy minItems (2) but item schema is unsatisfiable"
+        assert (
+            ve.value.args[0]
+            == "prefixItems has too few elements (1) to satisfy minItems (2) but item schema is unsatisfiable"
+        )
         assert ve.value.__cause__.args[0] == "allOf contains a 'false' schema"
 
+
 class TestArrayWithLengthConstraints:
     prefix_schema_obj = [{"type": "integer"}, {"type": "boolean"}]
     items_schema_obj = {"type": "string"}
@@ -1001,7 +1025,6 @@ def test_good_with_items(self, min_items, max_items, target_obj):
         }
         generate_and_check(target_obj, schema_obj)
 
-
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1082,7 +1105,6 @@ def test_bad_with_prefix_and_items(
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1147,7 +1169,6 @@ def test_bad_with_prefix(
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "min_items, max_items, bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1294,9 +1315,7 @@ def test_anyOf_objects(self, target_obj, temperature):
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
     def test_anyOf_unsatisfiable_ok(self):
-        schema = {
-            "anyOf": [{"type": "integer"}, False]
-        }
+        schema = {"anyOf": [{"type": "integer"}, False]}
         generate_and_check(3, schema)
 
     def test_anyOf_unsatisfiable_raises(self):
@@ -1305,7 +1324,11 @@ def test_anyOf_unsatisfiable_raises(self):
         }
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
-        assert ve.value.args[0] == 'all anyOf schemas are unsatisfiable: [{"type": "integer", "minimum": 10, "maximum": 0}, false]'
+        assert (
+            ve.value.args[0]
+            == 'all anyOf schemas are unsatisfiable: [{"type": "integer", "minimum": 10, "maximum": 0}, false]'
+        )
+
 
 class TestAllOf:
     @pytest.mark.parametrize(
@@ -1364,13 +1387,12 @@ def test_allOf_ref(self):
         generate_and_check(target_obj, schema_obj)
 
     def test_allOf_bad_schema(self):
-        schema = {
-            "allOf" : [{ "type": "integer" }, { "type": "string" }]
-        }
+        schema = {"allOf": [{"type": "integer"}, {"type": "string"}]}
         with pytest.raises(ValueError) as ve:
             _ = gen_json(schema=schema)
         assert ve.value.args[0] == "allOf has conflicting types: [{'integer'}, {'string'}]"
 
+
 class TestOneOf:
     @pytest.mark.parametrize("target_obj", [123, 42])
     def test_oneOf_simple(self, target_obj):
@@ -1385,7 +1407,6 @@ def test_oneOf_simple(self, target_obj):
         # The actual check
         generate_and_check(target_obj, schema_obj)
 
-
     @pytest.mark.parametrize("target_obj", [123, True])
     def test_oneOf_compound(self, target_obj):
         schema = """{
@@ -1423,7 +1444,6 @@ def test_enum(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1443,7 +1463,6 @@ def test_bad_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes):
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1471,13 +1490,10 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes)
             ("2", False),
             ("1", False),
             (True, False),
-        ]
+        ],
     )
     def test_typed_enum_single_type(self, obj, valid):
-        schema_obj = {
-            "enum": [1, "2", True],
-            "type": "integer"
-        }
+        schema_obj = {"enum": [1, "2", True], "type": "integer"}
         if valid:
             validate(instance=obj, schema=schema_obj)
             generate_and_check(obj, schema_obj)
@@ -1494,13 +1510,10 @@ def test_typed_enum_single_type(self, obj, valid):
             ("2", True),
             ("1", False),
             (True, False),
-        ]
+        ],
     )
     def test_typed_enum_multiple_types(self, obj, valid):
-        schema_obj = {
-            "enum": [1, "2", True],
-            "type": ["integer", "string"]
-        }
+        schema_obj = {"enum": [1, "2", True], "type": ["integer", "string"]}
         if valid:
             validate(instance=obj, schema=schema_obj)
             generate_and_check(obj, schema_obj)
@@ -1510,14 +1523,12 @@ def test_typed_enum_multiple_types(self, obj, valid):
             check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj)
 
     def test_invalid_typed_enum(self):
-        schema_obj = {
-            "enum": [1, "2"],
-            "type": "boolean"
-        }
+        schema_obj = {"enum": [1, "2"], "type": "boolean"}
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
         assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']"
 
+
 class TestConst:
     def test_constant_int(self):
         # First sanity check what we're setting up
@@ -1577,47 +1588,31 @@ def test_constant_precedence(self):
         )
 
     def test_valid_typed_const(self):
-        schema_obj = {
-            "const": 1,
-            "type": "integer"
-        }
+        schema_obj = {"const": 1, "type": "integer"}
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
 
     def test_invalid_typed_const(self):
-        schema_obj = {
-            "const": 1,
-            "type": "boolean"
-        }
+        schema_obj = {"const": 1, "type": "boolean"}
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
         assert ve.value.args[0] == "const 1 does not match schema {'type': 'boolean'}"
 
     def test_valid_enum_const(self):
-        schema_obj = {
-            "const": 1,
-            "enum": [1, 2, 3]
-        }
+        schema_obj = {"const": 1, "enum": [1, 2, 3]}
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
 
     def test_invalid_enum_const(self):
-        schema_obj = {
-            "const": 1,
-            "enum": [2, 3]
-        }
+        schema_obj = {"const": 1, "enum": [2, 3]}
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
         assert ve.value.args[0] == "const 1 does not match schema {'enum': [2, 3]}"
 
     def test_valid_typed_enum_const(self):
-        schema_obj = {
-            "const": 1,
-            "enum": [1, "2", 3],
-            "type": "integer"
-        }
+        schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"}
         target_obj = 1
         validate(instance=target_obj, schema=schema_obj)
         generate_and_check(target_obj, schema_obj)
@@ -1625,20 +1620,19 @@ def test_valid_typed_enum_const(self):
     @pytest.mark.parametrize(
         "const",
         [
-            "2", # right enum, wrong type
-            2, # wrong enum, right type
-            "3", # wrong enum, wrong type
-        ]
+            "2",  # right enum, wrong type
+            2,  # wrong enum, right type
+            "3",  # wrong enum, wrong type
+        ],
     )
     def test_invalid_typed_enum_const(self, const):
-        schema_obj = {
-            "const": const,
-            "enum": [1, "2", 3],
-            "type": "integer"
-        }
+        schema_obj = {"const": const, "enum": [1, "2", 3], "type": "integer"}
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
-        assert ve.value.args[0] == f"const {const!r} does not match schema {{'type': 'integer', 'enum': [1, '2', 3]}}"
+        assert (
+            ve.value.args[0]
+            == f"const {const!r} does not match schema {{'type': 'integer', 'enum': [1, '2', 3]}}"
+        )
 
 
 class TestAdditionalProperties:
@@ -1684,11 +1678,15 @@ def test_simple_additional_properties(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
-            ({"a": "1"}, b'{"a": ', b'"', INTEGER_LEADING, ),
+            (
+                {"a": "1"},
+                b'{"a": ',
+                b'"',
+                INTEGER_LEADING,
+            ),
             (
                 {"a": 1, "b": 1.5},
                 b'{"a": 1, "b": 1',
@@ -1708,9 +1706,7 @@ def test_simple_bad_type(self, bad_obj, good_bytes, failure_byte, allowed_bytes)
             schema_obj=schema_obj,
         )
 
-    @pytest.mark.parametrize(
-        "target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}]
-    )
+    @pytest.mark.parametrize("target_obj", [{}, {"a": 1}, {"a": "2"}, {"a": 1, "b": "2"}])
     def test_anyOf_additional_properties(self, target_obj):
         # First sanity check what we're setting up
         schema_obj = json.loads(self.anyOf_schema)
@@ -1719,7 +1715,6 @@ def test_anyOf_additional_properties(self, target_obj):
         # The actual check
         generate_and_check(target_obj, schema_obj)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1761,7 +1756,6 @@ def test_properties_and_additional_properties(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1770,9 +1764,7 @@ def test_properties_and_additional_properties(self, target_obj, temperature):
             ({"a": 1, "b": 2}, b'{"', b"a", {b"m"}),
         ],
     )
-    def test_combined_missing_properties(
-        self, bad_obj, good_bytes, failure_byte, allowed_bytes
-    ):
+    def test_combined_missing_properties(self, bad_obj, good_bytes, failure_byte, allowed_bytes):
         schema_obj = json.loads(self.combined_schema)
         bad_string = json_dumps(bad_obj)
         check_match_failure(
@@ -1783,7 +1775,6 @@ def test_combined_missing_properties(
             schema_obj=schema_obj,
         )
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1912,7 +1903,6 @@ def test_empty_schema(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_string, good_bytes, failure_byte, allowed_bytes",
         [
@@ -1941,9 +1931,7 @@ def test_empty_schema(self, target_obj, temperature):
             ),
         ],
     )
-    def test_bad_empty_schema(
-        self, bad_string, good_bytes, failure_byte, allowed_bytes
-    ):
+    def test_bad_empty_schema(self, bad_string, good_bytes, failure_byte, allowed_bytes):
         schema_obj = json.loads(self.empty_schema)
         check_match_failure(
             bad_string=bad_string,
@@ -1959,7 +1947,12 @@ def test_bad_empty_schema(
             # Empty property
             {"type": "object", "properties": {"a": {}}, "required": ["a"]},
             # Empty reference
-            {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]},
+            {
+                "type": "object",
+                "properties": {"a": {"$ref": "#/$defs/A"}},
+                "$defs": {"A": {}},
+                "required": ["a"],
+            },
         ],
     )
     @pytest.mark.parametrize(
@@ -1990,10 +1983,14 @@ def test_nested_empty_schema(self, schema_obj, target_obj, temperature):
             # Empty property
             {"type": "object", "properties": {"a": {}}, "required": ["a"]},
             # Empty reference
-            {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}, "required": ["a"]},
+            {
+                "type": "object",
+                "properties": {"a": {"$ref": "#/$defs/A"}},
+                "$defs": {"A": {}},
+                "required": ["a"],
+            },
         ],
     )
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -2036,7 +2033,6 @@ def test_nested_empty_schema_with_props(self, target_obj, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
-
     @pytest.mark.parametrize(
         "bad_obj, good_bytes, failure_byte, allowed_bytes",
         [
@@ -2071,7 +2067,6 @@ def test_items(self, schema_obj):
             [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj
         )
 
-
     def test_no_items(self):
         schema_obj = {"type": "array", "items": False}
         check_match_failure(
@@ -2104,7 +2099,6 @@ def test_additionalProperties(self, schema_obj):
             schema_obj,
         )
 
-
     def test_no_additionalProperties(self):
         schema_obj = {"type": "object", "additionalProperties": False}
         check_match_failure(
@@ -2115,17 +2109,17 @@ def test_no_additionalProperties(self):
             schema_obj=schema_obj,
         )
 
+
 def test_ignored_keys_allowed_as_properties():
     schema_obj = {
         "type": "object",
-        "properties": {
-            key: {"type": "string"} for key in IGNORED_KEYS
-        },
+        "properties": {key: {"type": "string"} for key in IGNORED_KEYS},
         "required": list(IGNORED_KEYS),
     }
     target_obj = {key: "value" for key in IGNORED_KEYS}
     generate_and_check(target_obj, schema_obj)
 
+
 class TestRequiredProperties:
     schema_obj = {
         "type": "object",
@@ -2134,10 +2128,19 @@ class TestRequiredProperties:
             "b": {"type": "number"},
             "c": {"type": "boolean"},
         },
-        "additionalProperties": True
+        "additionalProperties": True,
     }
     ALL_REQUIRED = ["a", "b", "c"]
-    SOME_REQUIRED_SUBSETS = [[], ["a"], ["b"], ["c"], ["a", "b"], ["a", "c"], ["b", "c"], ["a", "b", "c"]]
+    SOME_REQUIRED_SUBSETS = [
+        [],
+        ["a"],
+        ["b"],
+        ["c"],
+        ["a", "b"],
+        ["a", "c"],
+        ["b", "c"],
+        ["a", "b", "c"],
+    ]
     NONE_REQUIRED: list[str] = []
 
     @pytest.mark.parametrize(
@@ -2146,7 +2149,7 @@ class TestRequiredProperties:
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ]
+        ],
     )
     def test_all_required_good(self, extra_items):
         schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED}
@@ -2166,7 +2169,7 @@ def test_all_required_good(self, extra_items):
             ({"c": True}),
             # Missing all
             ({}),
-        ]
+        ],
     )
     def test_all_required_bad(self, bad_obj):
         schema_obj = {**self.schema_obj, "required": self.ALL_REQUIRED}
@@ -2181,7 +2184,7 @@ def test_all_required_bad(self, bad_obj):
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "required",
@@ -2219,7 +2222,7 @@ def test_some_required_bad(self, required):
             {},
             {"d": "hello"},
             {"d": 42, "e": True},
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "target_obj",
@@ -2232,55 +2235,48 @@ def test_some_required_bad(self, required):
             {"a": "hello", "c": True},
             {"b": 42, "c": True},
             {"a": "hello", "b": 42, "c": True},
-        ]
+        ],
     )
     def test_none_required(self, target_obj, extra_items):
         schema_obj = {**self.schema_obj, "required": self.NONE_REQUIRED}
         generate_and_check({**target_obj, **extra_items}, schema_obj)
 
+
 class TestRequiredPropertiesScaling:
-    @pytest.mark.parametrize(
-        "num_properties",
-        [1, 2, 3, 4, 5, 10, 20, 50, 100]
-    )
+    @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100])
     def test_many_optional_properties_doesnt_blow_up(self, num_properties):
         schema_obj = {
             "type": "object",
-            "properties": {
-                f"prop_{i}": {"type": "string"} for i in range(num_properties)
-            },
-            "required": [] # Empty should be worst-case scenario
+            "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)},
+            "required": [],  # Empty should be worst-case scenario
         }
         from guidance.library._json import GenJson
+
         genjson = GenJson(schema=schema_obj)
         genjson._join.__wrapped__.cache_clear()
         _ = genjson.root()
         cache_info = genjson._join.__wrapped__.cache_info()
 
         # Theoretical number of cache misses under the current implementation
-        expected_misses = 2*num_properties - 1
-        MISSES_MAGIC_NUMBER = 5 # Where in the world is this coming from?
+        expected_misses = 2 * num_properties - 1
+        MISSES_MAGIC_NUMBER = 5  # Where in the world is this coming from?
         assert 0 < cache_info.misses <= expected_misses + MISSES_MAGIC_NUMBER
         # NOTE: that if the cache maxsize is hit, the number of misses will be more than expected
 
         # Theoretical number of total calls under the current implementation
-        expected_calls = num_properties*(num_properties - 1) // 2
-        CALLS_MAGIC_NUMBER = 12 # Where in the world is this coming from?
+        expected_calls = num_properties * (num_properties - 1) // 2
+        CALLS_MAGIC_NUMBER = 12  # Where in the world is this coming from?
         assert 0 < cache_info.hits + cache_info.misses <= expected_calls + CALLS_MAGIC_NUMBER
 
-    @pytest.mark.parametrize(
-        "num_properties",
-        [1, 2, 3, 4, 5, 10, 20, 50, 100]
-    )
+    @pytest.mark.parametrize("num_properties", [1, 2, 3, 4, 5, 10, 20, 50, 100])
     def test_all_required_properties_doesnt_blow_up(self, num_properties):
         schema_obj = {
             "type": "object",
-            "properties": {
-                f"prop_{i}": {"type": "string"} for i in range(num_properties)
-            },
-            "required": [f"prop_{i}" for i in range(num_properties)]
+            "properties": {f"prop_{i}": {"type": "string"} for i in range(num_properties)},
+            "required": [f"prop_{i}" for i in range(num_properties)],
         }
         from guidance.library._json import GenJson
+
         genjson = GenJson(schema=schema_obj)
         genjson._join.__wrapped__.cache_clear()
         _ = genjson.root()
@@ -2308,7 +2304,7 @@ class TestBooleanSchema:
             {"a": [1, 2, 3]},
             {"a": {"b": 1}},
             False,
-            True
+            True,
         ],
     )
     def test_true_schema(self, target_obj):
@@ -2327,7 +2323,10 @@ def test_false_required_property(self):
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
         assert ve.value.args[0] == "Required property 'a' is unsatisfiable"
-        assert ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        assert (
+            ve.value.__cause__.args[0] == "No valid JSON can be generated from a schema of `false`"
+        )
+
 
 class TestWhitespace:
     seps = [
@@ -2346,7 +2345,7 @@ class TestWhitespace:
             ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
             # Static object: const (both item and key seps)
             ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "separators",
@@ -2372,7 +2371,7 @@ def test_separators(self, separators, schema, obj):
             ({"enum": [{"a": 1, "b": 2, "c": [1, 2, 3]}]}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
             # Static object: const (both item and key seps)
             ({"const": {"a": 1, "b": 2, "c": [1, 2, 3]}}, {"a": 1, "b": 2, "c": [1, 2, 3]}),
-        ]
+        ],
     )
     @pytest.mark.parametrize(
         "separators",
diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py
index fd1136058..f2248129d 100644
--- a/tests/unit/library/json/test_refs.py
+++ b/tests/unit/library/json/test_refs.py
@@ -1,10 +1,11 @@
+from json import dumps as json_dumps
+
 import pytest
 from jsonschema import ValidationError, validate
 
-from json import dumps as json_dumps
-
 from .utils import check_match_failure, generate_and_check
 
+
 class TestRefs:
     @pytest.mark.parametrize(
         ["test_object", "valid"],
@@ -438,9 +439,15 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
             # invalid on inner field
             ({"bar": "a", "foo": {"bar": 1}}, False),
             # invalid on outer field
-            ({ "bar": 1, "foo": {"bar": "a"}}, False),
+            ({"bar": 1, "foo": {"bar": "a"}}, False),
             # valid on both fields
-            ({"bar": "a", "foo": {"bar": "a"}, }, True),
+            (
+                {
+                    "bar": "a",
+                    "foo": {"bar": "a"},
+                },
+                True,
+            ),
         ],
     )
     def test_refs_with_relative_uris_and_defs(self, test_object, valid):
@@ -974,4 +981,4 @@ def test_empty_tokens_in_ref_json_pointer(self, test_object, valid):
         else:
             with pytest.raises(ValidationError):
                 validate(instance=test_object, schema=schema)
-            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
\ No newline at end of file
+            check_match_failure(bad_string=json_dumps(test_object), schema_obj=schema)
diff --git a/tests/unit/library/json/test_string_format.py b/tests/unit/library/json/test_string_format.py
index 09712fb45..7b2dd9bdc 100644
--- a/tests/unit/library/json/test_string_format.py
+++ b/tests/unit/library/json/test_string_format.py
@@ -1,8 +1,9 @@
 """Adapted from https://github.com/json-schema-org/JSON-Schema-Test-Suite/tree/9fc880bfb6d8ccd093bc82431f17d13681ffae8e/tests/draft2020-12/optional/format"""
 
-import pytest
 import json
 
+import pytest
+
 from .utils import check_match_failure, generate_and_check
 
 
@@ -44,17 +45,35 @@ def test_good(self, target_str):
         "bad_str",
         [
             '"2020-01-32"',  # a invalid date string with 32 days in January
-            pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 29 days in February (normal)
-            pytest.param('"2020-02-30"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 30 days in February (leap)
+            pytest.param(
+                '"2021-02-29"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 29 days in February (normal)
+            pytest.param(
+                '"2020-02-30"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 30 days in February (leap)
             '"2020-03-32"',  # a invalid date string with 32 days in March
-            pytest.param('"2020-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in April
+            pytest.param(
+                '"2020-04-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 31 days in April
             '"2020-05-32"',  # a invalid date string with 32 days in May
-            pytest.param('"2020-06-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in June
+            pytest.param(
+                '"2020-06-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 31 days in June
             '"2020-07-32"',  # a invalid date string with 32 days in July
             '"2020-08-32"',  # a invalid date string with 32 days in August
-            pytest.param('"2020-09-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in September
+            pytest.param(
+                '"2020-09-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 31 days in September
             '"2020-10-32"',  # a invalid date string with 32 days in October
-            pytest.param('"2020-11-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # a invalid date string with 31 days in November
+            pytest.param(
+                '"2020-11-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # a invalid date string with 31 days in November
             '"2020-12-32"',  # a invalid date string with 32 days in December
             '"2020-13-01"',  # a invalid date string with invalid month
             '"06/19/1963"',  # an invalid date string
@@ -62,8 +81,13 @@ def test_good(self, target_str):
             '"1998-1-20"',  # non-padded month dates are not valid
             '"1998-01-1"',  # non-padded day dates are not valid
             '"1998-13-01"',  # invalid month
-            pytest.param('"1998-04-31"', marks=pytest.mark.xfail(reason="number of days not yet tied to month")),  # invalid month-day combination
-            pytest.param('"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")),  # 2021 is not a leap year
+            pytest.param(
+                '"1998-04-31"',
+                marks=pytest.mark.xfail(reason="number of days not yet tied to month"),
+            ),  # invalid month-day combination
+            pytest.param(
+                '"2021-02-29"', marks=pytest.mark.xfail(reason="leap days are hard")
+            ),  # 2021 is not a leap year
             '"1963-06-1\\u09ea"',  # invalid non-ASCII '৪' (a Bengali 4)
             '"20230328"',  # ISO8601 / non-RFC3339: YYYYMMDD without dashes (2023-03-28)
             '"2023-W01"',  # ISO8601 / non-RFC3339: week number implicit day of week (2023-01-02)
@@ -137,6 +161,7 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
+
 @pytest.mark.xfail(reason="idn-hostname format not implemented")
 class TestIdnHostname:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"idn-hostname"}'
@@ -300,6 +325,7 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
+
 @pytest.mark.xfail(reason="iri-reference format is not yet implemented")
 class TestIriReference:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"iri-reference"}'
@@ -489,20 +515,40 @@ def test_good(self, target_str):
             '"008:030:006Z"',  # invalid time string with extra leading zeros
             '"8:3:6Z"',  # invalid time string with no leading zero for single digit
             '"8:0030:6Z"',  # hour, minute, second must be two digits
-            pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, Zulu (wrong hour)
-            pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, Zulu (wrong minute)
-            pytest.param('"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, zero time-offset (wrong hour)
-            pytest.param('"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, zero time-offset (wrong minute)
-            pytest.param('"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, positive time-offset (wrong hour)
-            pytest.param('"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, positive time-offset (wrong minute)
-            pytest.param('"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, negative time-offset (wrong hour)
-            pytest.param('"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # invalid leap second, negative time-offset (wrong minute)
+            pytest.param(
+                '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, Zulu (wrong hour)
+            pytest.param(
+                '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, Zulu (wrong minute)
+            pytest.param(
+                '"22:59:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, zero time-offset (wrong hour)
+            pytest.param(
+                '"23:58:60+00:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, zero time-offset (wrong minute)
+            pytest.param(
+                '"23:59:60+01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, positive time-offset (wrong hour)
+            pytest.param(
+                '"23:59:60+00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, positive time-offset (wrong minute)
+            pytest.param(
+                '"23:59:60-01:00"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, negative time-offset (wrong hour)
+            pytest.param(
+                '"23:59:60-00:30"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # invalid leap second, negative time-offset (wrong minute)
             '"08:30:06-8:000"',  # hour, minute in time-offset must be two digits
             '"24:00:00Z"',  # an invalid time string with invalid hour
             '"00:60:00Z"',  # an invalid time string with invalid minute
             '"00:00:61Z"',  # an invalid time string with invalid second
-            pytest.param('"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid time string with invalid leap second (wrong hour)
-            pytest.param('"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid time string with invalid leap second (wrong minute)
+            pytest.param(
+                '"22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # an invalid time string with invalid leap second (wrong hour)
+            pytest.param(
+                '"23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # an invalid time string with invalid leap second (wrong minute)
             '"01:02:03+24:00"',  # an invalid time string with invalid time numoffset hour
             '"01:02:03+00:60"',  # an invalid time string with invalid time numoffset minute
             '"01:02:03Z+00:30"',  # an invalid time string with invalid time with both Z and numoffset
@@ -538,11 +584,23 @@ class TestIpv6:
             '"::42:ff:1"',  # leading colons is valid
             '"d6::"',  # trailing colons is valid
             '"1:d6::42"',  # single set of double colons in the middle is valid
-            pytest.param('"1::d6:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with the ipv4 section as decimal octets
-            pytest.param('"1:2::192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with double colons between the sections
-            pytest.param('"::ffff:192.168.0.1"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # mixed format with leading double colons (ipv4-mapped ipv6 address)
+            pytest.param(
+                '"1::d6:192.168.0.1"',
+                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
+            ),  # mixed format with the ipv4 section as decimal octets
+            pytest.param(
+                '"1:2::192.168.0.1"',
+                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
+            ),  # mixed format with double colons between the sections
+            pytest.param(
+                '"::ffff:192.168.0.1"',
+                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
+            ),  # mixed format with leading double colons (ipv4-mapped ipv6 address)
             '"1:2:3:4:5:6:7:8"',  # 8 octets
-            pytest.param('"1000:1000:1000:1000:1000:1000:255.255.255.255"', marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented")),  # a long valid ipv6
+            pytest.param(
+                '"1000:1000:1000:1000:1000:1000:255.255.255.255"',
+                marks=pytest.mark.xfail(reason="Mixed format IPv6 not implemented"),
+            ),  # a long valid ipv6
         ],
     )
     def test_good(self, target_str):
@@ -710,11 +768,22 @@ class TestEmail:
             '"te~st@example.com"',  # tilde in local part is valid
             '"~test@example.com"',  # tilde before local part is valid
             '"test~@example.com"',  # tilde after local part is valid
-            pytest.param('"\\"joe bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a space in the local part is valid
-            pytest.param('"\\"joe..bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a double dot in the local part is valid
-            pytest.param('"\\"joe@bloggs\\"@example.com"', marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part")),  # a quoted string with a @ in the local part is valid
+            pytest.param(
+                '"\\"joe bloggs\\"@example.com"',
+                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
+            ),  # a quoted string with a space in the local part is valid
+            pytest.param(
+                '"\\"joe..bloggs\\"@example.com"',
+                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
+            ),  # a quoted string with a double dot in the local part is valid
+            pytest.param(
+                '"\\"joe@bloggs\\"@example.com"',
+                marks=pytest.mark.xfail(reason="Quoted strings not yet implemented in local part"),
+            ),  # a quoted string with a @ in the local part is valid
             '"joe.bloggs@[127.0.0.1]"',  # an IPv4-address-literal after the @ is valid
-            pytest.param('"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")),  # an IPv6-address-literal after the @ is valid
+            pytest.param(
+                '"joe.bloggs@[IPv6:::1]"', marks=pytest.mark.xfail(reason="IPv6 is hard")
+            ),  # an IPv6-address-literal after the @ is valid
             '"te.s.t@example.com"',  # two separated dots inside local part are valid
             '"riedgar+guidance@example.com"',  # plus sign in local part is valid
         ],
@@ -860,9 +929,16 @@ def test_good(self, target_str):
         "bad_str",
         [
             '"1998-12-31T23:59:61Z"',  # an invalid date-time past leap second, UTC
-            pytest.param('"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid date-time with leap second on a wrong minute, UTC
-            pytest.param('"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")),  # an invalid date-time with leap second on a wrong hour, UTC
-            pytest.param('"1990-02-31T15:59:59.123-08:00"', marks=pytest.mark.xfail(reason="valid days not yet tied to month")),  # an invalid day in date-time string
+            pytest.param(
+                '"1998-12-31T23:58:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # an invalid date-time with leap second on a wrong minute, UTC
+            pytest.param(
+                '"1998-12-31T22:59:60Z"', marks=pytest.mark.xfail(reason="leap seconds are hard")
+            ),  # an invalid date-time with leap second on a wrong hour, UTC
+            pytest.param(
+                '"1990-02-31T15:59:59.123-08:00"',
+                marks=pytest.mark.xfail(reason="valid days not yet tied to month"),
+            ),  # an invalid day in date-time string
             '"1990-12-31T15:59:59-24:00"',  # an invalid offset in date-time string
             '"1963-06-19T08:30:06.28123+01:00Z"',  # an invalid closing Z after time-zone offset
             '"06/19/1963 08:30:06 PST"',  # an invalid date-time string
@@ -877,6 +953,7 @@ def test_bad(self, bad_str):
         schema_obj = json.loads(self.schema)
         check_match_failure(bad_string=bad_str, schema_obj=schema_obj)
 
+
 @pytest.mark.xfail(reason="regex format not implemented")
 class TestRegex:
     schema = '{"$schema":"https://json-schema.org/draft/2020-12/schema","format":"regex"}'
diff --git a/tests/unit/library/json/utils.py b/tests/unit/library/json/utils.py
index d75c41d4b..5498d718c 100644
--- a/tests/unit/library/json/utils.py
+++ b/tests/unit/library/json/utils.py
@@ -1,6 +1,7 @@
 import json
 from functools import partial
-from json import loads as json_loads, dumps as json_dumps
+from json import dumps as json_dumps
+from json import loads as json_loads
 from typing import Any, Optional, Union
 
 from jsonschema import validate
@@ -8,18 +9,15 @@
 from guidance import json as gen_json
 from guidance.library._json import JSONSchema
 
-from ....utils import check_match_failure as _check_match_failure, check_run_with_temperature, generate_and_check as _generate_and_check
-
-from jsonschema import validate
-
-
-import json
-from functools import partial
-from json import dumps as json_dumps, loads as json_loads
+from ....utils import check_match_failure as _check_match_failure
+from ....utils import check_run_with_temperature
+from ....utils import generate_and_check as _generate_and_check
 
 
 def generate_and_check(
-    target_obj: Any, schema_obj: Union[str, JSONSchema], desired_temperature: Optional[float] = None
+    target_obj: Any,
+    schema_obj: Union[str, JSONSchema],
+    desired_temperature: Optional[float] = None,
 ):
     if isinstance(schema_obj, str):
         schema_obj = json_loads(schema_obj)
@@ -32,9 +30,7 @@ def generate_and_check(
     # Now test that the grammar can recognize and generate prepared_json
     # We partial in the grammar_callable
     if desired_temperature is not None:
-        grammar_callable = partial(
-            gen_json, schema=schema_obj, temperature=desired_temperature
-        )
+        grammar_callable = partial(gen_json, schema=schema_obj, temperature=desired_temperature)
     else:
         grammar_callable = partial(gen_json, schema=schema_obj)
 
@@ -61,4 +57,4 @@ def check_match_failure(
         failure_byte=failure_byte,
         allowed_bytes=allowed_bytes,
         grammar=grammar,
-    )
\ No newline at end of file
+    )

From 1295affed69973c3a239063f29390538fb68b3d5 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 5 Nov 2024 13:48:25 -0800
Subject: [PATCH 61/70] refactor allOf contents out to reduce_schema

---
 guidance/library/_json.py                    |  25 ++--
 guidance/library/_json_normalization copy.py |  94 ++++++++++++++
 guidance/library/_json_normalization.py      | 121 +++++++++++++++++++
 3 files changed, 230 insertions(+), 10 deletions(-)
 create mode 100644 guidance/library/_json_normalization copy.py
 create mode 100644 guidance/library/_json_normalization.py

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index dc2320bce..50eea0a01 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -778,14 +778,8 @@ def oneOf(
         warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")
         return lm + self.anyOf(anyof_list=oneof_list, base_uri=base_uri)
 
-    @guidance(stateless=True)
-    def allOf(
-        self,
-        lm,
-        *,
-        parent_schema: JSONSchema,
-        base_uri: str,
-    ):
+
+    def reduce_schema(self, orig_schema: dict[str, Any], base_uri: str) -> dict[str, Any]:
         types: list[set[str]] = []
         properties: defaultdict[str, list[JSONSchema]] = defaultdict(list)
         required: dict[str, None] = dict() # use a dict for ordered-set behavior
@@ -909,7 +903,7 @@ def add_schema(schema: JSONSchema, base_uri: str):
                     continue
                 handle_keyword(key, value, schema, base_uri)
 
-        add_schema(parent_schema, base_uri)
+        add_schema(orig_schema, base_uri)
 
         combined_schema: dict[str, Any] = {}
 
@@ -1002,8 +996,19 @@ def reduce_types(type_a: set[str], type_b: set[str]) -> set[str]:
 
         assert not set(combined_schema) & set(other_data)
         combined_schema.update(other_data)
+        return combined_schema
 
-        return lm + self.json(json_schema=combined_schema, base_uri=base_uri)
+
+    @guidance(stateless=True)
+    def allOf(
+        self,
+        lm,
+        *,
+        parent_schema: JSONSchema,
+        base_uri: str,
+    ):
+        reduced_schema = self.reduce_schema(parent_schema, base_uri)
+        return lm + self.json(json_schema=reduced_schema, base_uri=base_uri)
 
 
     @guidance(stateless=True)
diff --git a/guidance/library/_json_normalization copy.py b/guidance/library/_json_normalization copy.py
new file mode 100644
index 000000000..12c9f29c3
--- /dev/null
+++ b/guidance/library/_json_normalization copy.py	
@@ -0,0 +1,94 @@
+from __future__ import annotations
+from typing import Any, Optional, TypedDict, cast, NamedTuple
+from itertools import product
+
+from typing import TypedDict, List, Union, Any, Dict
+
+# Unnormalized Schema Definitions
+
+class BaseSchema(TypedDict, total=False):
+    type: str
+    properties: Dict[str, Any]
+    items: Any
+    required: List[str]
+    enum: List[Any]
+    const: Any
+    minimum: int
+    maximum: int
+    minLength: int
+    maxLength: int
+    pattern: str
+    # Other schema keywords can be added here
+
+class Schema(BaseSchema):
+    allOf: List[Schema]
+    anyOf: List[Schema]
+    oneOf: List[Schema]
+
+# Normalized Schema Definitions
+
+class NormalizedAllOfSchema(BaseSchema):
+    allOf: List[BaseSchema]
+
+class NormalizedAnyOfSchema(TypedDict):
+    anyOf: List[Union[NormalizedAllOfSchema, BaseSchema]]
+
+class NormalizedOneOfSchema(TypedDict):
+    oneOf: List[Union[NormalizedAllOfSchema, BaseSchema]]
+
+# The NormalizedSchema can be a NormalizedBaseSchema or top-level combinators without nesting
+NormalizedSchema = Union[BaseSchema, NormalizedAllOfSchema, NormalizedAnyOfSchema, NormalizedOneOfSchema]
+
+class Combinators(NamedTuple):
+    allOf: List[Schema]
+    anyOf: List[Schema]
+    oneOf: List[Schema]
+
+def maybe_allOf(nodes: list[BaseSchema],  siblings: Optional[BaseSchema] = None) -> NormalizedSchema:
+    if len(nodes) == 1 and not siblings:
+        return nodes[0]
+    if siblings:
+        return {"allOf": [*nodes, siblings]}
+    return {"allOf": nodes}
+
+def get_combinators_and_siblings(node: Schema) -> tuple[Combinators, BaseSchema]:
+    allOf = cast(list[Schema], node.pop("allOf", []))
+    oneOf = cast(list[Schema], node.pop("oneOf", []))
+    anyOf = cast(list[Schema], node.pop("anyOf", []))
+    siblings = node
+    return Combinators(allOf, oneOf, anyOf), siblings
+
+def normalize(orig_node: Schema) -> NormalizedSchema:
+    ((allOf_list, oneOf_list, anyOf_list), siblings) = get_combinators_and_siblings(orig_node)
+    if not allOf_list and not oneOf_list and not anyOf_list:
+        return siblings
+    
+    allOf_list = normalize_allOf(allOf_list, siblings)
+    anyOf_list = normalize_oneOf_anyOf(anyOf_list)
+
+    if oneOf_list and anyOf_list:
+        node: NormalizedOneOfSchema = {
+            "oneOf": [
+                maybe_allOf([oneOf_item, anyOf_item, *allOf_list])
+                for anyOf_item in anyOf_list
+                for oneOf_item in oneOf_list
+            ]
+        }
+    elif oneOf_list:
+        node: NormalizedOneOfSchema = {
+            "oneOf": [
+                maybe_allOf([oneOf_item, *allOf_list])
+                for oneOf_item in oneOf_list
+            ]
+        }
+    elif anyOf_list:
+        node: NormalizedAnyOfSchema = {
+            "anyOf": [
+                maybe_allOf([anyOf_item, *allOf_list])
+                for anyOf_item in anyOf_list
+            ]
+        }
+    elif allOf_list:
+        node: NormalizedSchema = maybe_allOf(allOf_list)
+    
+    return node
\ No newline at end of file
diff --git a/guidance/library/_json_normalization.py b/guidance/library/_json_normalization.py
new file mode 100644
index 000000000..448f2ea8b
--- /dev/null
+++ b/guidance/library/_json_normalization.py
@@ -0,0 +1,121 @@
+from __future__ import annotations
+from typing import Any, Optional
+from itertools import product
+
+
+def normalize_allOf(subnodes: list[dict[str, Any]], siblings: dict[str, Any] = {}) -> dict[str, Any]:
+    if not subnodes:
+        return siblings
+
+    # Normalization will ensure that there are no applicable "anyOf" or "oneOf" keys
+    # except at the top level of the schema
+    subnodes = [normalize(node) for node in subnodes]
+    groups = []
+    if any("oneOf" in node for node in subnodes):
+        # Binds more tightly than anyOf
+        kind = "oneOf"
+    elif any("anyOf" in node for node in subnodes):
+        kind = "anyOf"
+    else:
+        # We are done
+        return maybe_allOf(subnodes, siblings)
+
+    other = []
+    if siblings:
+        other.append(siblings)
+
+    for node in subnodes:
+        if "oneOf" in node and "anyOf" in node:
+            oneOf_list = node.pop("oneOf")
+            anyOf_list = node.pop("anyOf")
+            groups.append(list(product(oneOf_list, anyOf_list)))
+
+        elif "oneOf" in node:
+            oneOf_list = node.pop("oneOf")
+            groups.append(oneOf_list)
+
+        elif "anyOf" in node:
+            anyOf_list = node.pop("anyOf")
+            groups.append(anyOf_list)
+
+        if "allOf" in node:
+            other.extend(node.pop("allOf"))
+
+        if node:
+            # If there are any keys left, they need to end up in every allOf
+            other.append(node)
+
+    return {
+        kind: [
+            maybe_allOf([*item, *other])
+            for item in product(*groups)
+        ]
+    }
+
+def maybe_allOf(nodes: list[dict[str, Any]], siblings: Optional[dict[str, Any]] = None) -> dict[str, Any]:
+    if len(nodes) == 1 and not siblings:
+        return nodes[0]
+    if siblings:
+        return {"allOf": [*nodes, siblings]}
+    return {"allOf": nodes}
+
+
+def normalize(node: dict[str, Any]) -> dict[str, Any]:
+    node = normalize_allOf(node.pop("allOf", []), node)
+    oneOf_list = node.pop("oneOf", [])
+    anyOf_list = node.pop("anyOf", [])
+    allOf_list = node.pop("allOf", [])
+
+    if oneOf_list and anyOf_list:
+        node = {
+            "oneOf": [
+                maybe_allOf([oneOf_item, anyOf_item, *allOf_list], node)
+                for anyOf_item in anyOf_list
+                for oneOf_item in oneOf_list
+            ]
+        }
+    elif oneOf_list:
+        node = {
+            "oneOf": [
+                maybe_allOf([oneOf_item, *allOf_list], node)
+                for oneOf_item in oneOf_list
+            ]
+        }
+    elif anyOf_list:
+        node = {
+            "anyOf": [
+                maybe_allOf([anyOf_item, *allOf_list], node)
+                for anyOf_item in anyOf_list
+            ]
+        }
+    elif allOf_list:
+        node = maybe_allOf([node, *allOf_list])
+    return node
+
+def normalize_oneOf_anyOf(node: dict[str, Any]) -> dict[str, Any]:
+    oneOf_list = node.pop("oneOf", [])
+    anyOf_list = node.pop("anyOf", [])
+    allOf_list = node.pop("allOf", [])
+
+    if oneOf_list and anyOf_list:
+        node = {
+            "oneOf": [
+                maybe_allOf([oneOf_item, anyOf_item, *allOf_list], node)
+                for anyOf_item in anyOf_list
+                for oneOf_item in oneOf_list
+            ]
+        }
+    elif oneOf_list:
+        node = {
+            "oneOf": [
+                maybe_allOf([oneOf_item, *allOf_list], node)
+                for oneOf_item in oneOf_list
+            ]
+        }
+    elif anyOf_list:
+        node = {
+            "anyOf": [
+                maybe_allOf([anyOf_item, *allOf_list], node)
+                for anyOf_item in anyOf_list
+            ]
+        }

From 67fd5e5a55355bb5e7023316fff2e2837838173f Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 5 Nov 2024 15:22:58 -0800
Subject: [PATCH 62/70] refactor sibling handling into push_sibling_keys

---
 guidance/library/_json.py | 137 ++++++++++++++++++--------------------
 1 file changed, 63 insertions(+), 74 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 50eea0a01..208b6dcfc 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -778,6 +778,59 @@ def oneOf(
         warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")
         return lm + self.anyOf(anyof_list=oneof_list, base_uri=base_uri)
 
+    def push_sibling_keys(self, json_schema: JSONSchema) -> JSONSchema:
+        """
+        If sibling keys are present next to anyOf, oneOf, or $ref, we push them down into an allOf.
+        """
+        parent_schema = json_schema.copy()
+        anyof_list = parent_schema.pop(Keyword.ANYOF, [])
+        oneof_list = parent_schema.pop(Keyword.ONEOF, [])
+        allof_list = parent_schema.pop(Keyword.ALLOF, [])
+        ref = parent_schema.pop(Keyword.REF, None)
+
+        common = []
+        if VALID_KEYS.intersection(parent_schema) - set(IGNORED_KEYS):
+            # If there are any sibling keys, we need to push them down into an allOf
+            common.append(parent_schema)
+        if allof_list:
+            common.extend(allof_list)
+        if ref:
+            # TODO: $id / base_uri?
+            common.append({Keyword.REF: ref})
+
+        if anyof_list and oneof_list:
+            return {
+                "oneOf": [
+                    {"allOf": common + [one_item, any_item]}
+                    for one_item in oneof_list
+                    for any_item in anyof_list
+                ],
+            }
+
+        if oneof_list:
+            if not common:
+                return {"oneOf": oneof_list}
+            return {
+                "oneOf": [
+                    {"allOf": common + [one_item]}
+                    for one_item in oneof_list
+                ],
+            }
+
+        if anyof_list:
+            if not common:
+                return {"anyOf": anyof_list}
+            return {
+                "anyOf": [
+                    {"allOf": common + [any_item]}
+                    for any_item in anyof_list
+                ],
+            }
+
+        if len(common) == 1:
+            return common[0]
+
+        return {"allOf": common}
 
     def reduce_schema(self, orig_schema: dict[str, Any], base_uri: str) -> dict[str, Any]:
         types: list[set[str]] = []
@@ -1130,96 +1183,32 @@ def json(
         if json_schema == {}:
             return lm + self.any()
 
-        validate_json_node_keys(json_schema)
-
         if Keyword.ID in json_schema:
             # "cd" into the new base_uri
             base_uri = urijoin(base_uri, json_schema[Keyword.ID])
 
-        if Keyword.ALLOF in json_schema and Keyword.ANYOF in json_schema:
-            parent_schema = json_schema.copy()
-            anyof_list = parent_schema.pop(Keyword.ANYOF)
-            allof_list = parent_schema.pop(Keyword.ALLOF)
-            # Reduce the problem to an anyOf of allOfs
-            return lm + self.anyOf(
-                anyof_list=[
-                    {"allOf": [any_item, *allof_list], **parent_schema}
-                    for any_item in anyof_list
-                ],
-                base_uri=base_uri,
-            )
-
-        if Keyword.ALLOF in json_schema and Keyword.ONEOF in json_schema:
-            parent_schema = json_schema.copy()
-            allof_list = parent_schema.pop(Keyword.ALLOF)
-            oneof_list = parent_schema.pop(Keyword.ONEOF)
-            # Reduce the problem to a oneOf of allOfs
-            return lm + self.oneOf(
-                oneof_list=[
-                    {"allOf": [one_item, *allof_list], **parent_schema}
-                    for one_item in oneof_list
-                ],
-                base_uri=base_uri,
-            )
-
-        if Keyword.ANYOF in json_schema and Keyword.ONEOF in json_schema:
-            parent_schema = json_schema.copy()
-            anyof_list = parent_schema.pop(Keyword.ANYOF)
-            oneof_list = parent_schema.pop(Keyword.ONEOF)
-            assert Keyword.ALLOF not in parent_schema
-            # Reduce the problem to a oneOf of allOfs
-            return lm + self.oneOf(
-                oneof_list=[
-                    {"allOf": [one_item, any_item], **parent_schema}
-                    for any_item in anyof_list
-                    for one_item in oneof_list
-                ],
-                base_uri=base_uri,
-            )
+        validate_json_node_keys(json_schema)
+        json_schema = self.push_sibling_keys(json_schema)
 
         if Keyword.ALLOF in json_schema:
+            sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF)
+            assert not sibling_keys
             return lm + self.allOf(parent_schema=json_schema, base_uri=base_uri)
 
         if Keyword.ANYOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF)
-            if not sibling_keys:
-                return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF], base_uri=base_uri)
-            # Let the allOf function handle anyOfs with sibling keys
-            parent_schema = json_schema.copy()
-            anyof_list = parent_schema.pop(Keyword.ANYOF)
-            return lm + self.anyOf(
-                anyof_list=[
-                    {"allOf": [any_item], **parent_schema}
-                    for any_item in anyof_list
-                ],
-                base_uri=base_uri,
-            )
+            assert not sibling_keys
+            return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF], base_uri=base_uri)
 
         if Keyword.ONEOF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF)
-            if not sibling_keys:
-                return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF], base_uri=base_uri)
-            # Let the allOf function handle oneOfs with sibling keys
-            parent_schema = json_schema.copy()
-            oneof_list = parent_schema.pop(Keyword.ONEOF)
-            assert Keyword.ALLOF not in parent_schema
-            return lm + self.oneOf(
-                oneof_list=[
-                    {"allOf": [one_item], **parent_schema}
-                    for one_item in oneof_list
-                ],
-                base_uri=base_uri,
-            )
+            assert not sibling_keys
+            return lm + self.oneOf(oneof_list=json_schema[Keyword.ONEOF], base_uri=base_uri)
 
         if Keyword.REF in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.REF)
-            if not sibling_keys:
-                return lm + self.ref(reference=json_schema[Keyword.REF], base_uri=base_uri)
-            # Let the allOf function handle refs with sibling keys
-            parent_schema = json_schema.copy()
-            ref = parent_schema.pop(Keyword.REF)
-            assert Keyword.ALLOF not in parent_schema
-            return lm + self.allOf(parent_schema={"allOf": [{Keyword.REF: ref}], **parent_schema}, base_uri=base_uri)
+            assert not sibling_keys
+            return lm + self.ref(reference=json_schema[Keyword.REF], base_uri=base_uri)
 
         if Keyword.CONST in json_schema:
             sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM}

From e4ff3aa37eccb5869069d38f3dd2d5edce93b465 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 5 Nov 2024 15:23:49 -0800
Subject: [PATCH 63/70] drop unnecessary cd-ing

---
 guidance/library/_json.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 208b6dcfc..5dc992c67 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -1183,9 +1183,6 @@ def json(
         if json_schema == {}:
             return lm + self.any()
 
-        if Keyword.ID in json_schema:
-            # "cd" into the new base_uri
-            base_uri = urijoin(base_uri, json_schema[Keyword.ID])
 
         validate_json_node_keys(json_schema)
         json_schema = self.push_sibling_keys(json_schema)

From b989a0390ad533e3367c2dcd3e8b8f221a17cbb9 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 5 Nov 2024 15:42:41 -0800
Subject: [PATCH 64/70] reorder properties in test cases to be consistent with
 the order we validate (now prioritizing base schema)

---
 tests/unit/library/json/test_allOf.py |  6 +++---
 tests/unit/library/json/test_refs.py  | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py
index 261f40345..2802a858b 100644
--- a/tests/unit/library/json/test_allOf.py
+++ b/tests/unit/library/json/test_allOf.py
@@ -373,10 +373,10 @@ def test_allOf_combined_with_anyOf_oneOf(self, test_object, valid):
             # additionalProperties in parent schema
             {
                 "allOf": [
-                    {"properties": {"foo": {"maximum": 4}}, "additionalProperties": {"minimum": 5}}
+                    {"properties": {"bar": {"maximum": 5}}, "additionalProperties": {"type": ["integer", "null"]}}
                 ],
-                "properties": {"bar": {"maximum": 5}},
-                "additionalProperties": {"type": ["integer", "null"]},
+                "properties": {"foo": {"maximum": 4}},
+                "additionalProperties": {"minimum": 5},
             },
             # additionalProperties in allOf
             {
diff --git a/tests/unit/library/json/test_refs.py b/tests/unit/library/json/test_refs.py
index f2248129d..db695cc7e 100644
--- a/tests/unit/library/json/test_refs.py
+++ b/tests/unit/library/json/test_refs.py
@@ -437,14 +437,14 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
         ["test_object", "valid"],
         [
             # invalid on inner field
-            ({"bar": "a", "foo": {"bar": 1}}, False),
+            ({"foo": {"bar": 1}, "bar": "a"}, False),
             # invalid on outer field
-            ({"bar": 1, "foo": {"bar": "a"}}, False),
+            ({"foo": {"bar": "a"}, "bar": 1}, False),
             # valid on both fields
             (
                 {
-                    "bar": "a",
                     "foo": {"bar": "a"},
+                    "bar": "a",
                 },
                 True,
             ),
@@ -475,11 +475,11 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid):
         ["test_object", "valid"],
         [
             # invalid on inner field
-            ({"bar": "a", "foo": {"bar": 1}}, False),
+            ({"foo": {"bar": 1}, "bar": "a"}, False),
             # invalid on outer field
-            ({"bar": 1, "foo": {"bar": "a"}}, False),
+            ({"foo": {"bar": "a"}, "bar": 1}, False),
             # valid on both fields
-            ({"bar": "a", "foo": {"bar": "a"}}, True),
+            ({"foo": {"bar": "a"}, "bar": "a"}, True),
         ],
     )
     def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid):

From 241c47e5c36b2a8173e146bfb672e156dd9dbfa4 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 5 Nov 2024 17:52:57 -0800
Subject: [PATCH 65/70] simplify enum and const validation

---
 guidance/library/_json.py            | 49 ++++++++++------------------
 tests/unit/library/json/test_json.py |  8 ++---
 2 files changed, 22 insertions(+), 35 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 5dc992c67..06a9adaed 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -1070,23 +1070,15 @@ def const(
         lm,
         *,
         value: Union[None, bool, int, float, str, Mapping, Sequence],
-        instance_type: Optional[Union[str, Sequence[str]]] = None,
-        enum: Optional[Sequence[Union[None, bool, int, float, str, Mapping, Sequence]]] = None,
+        parent_schema: JSONSchema,
     ):
-        schema_to_validate_against: dict[str, Any] = {}
-        if instance_type is not None:
-            schema_to_validate_against["type"] = instance_type
-        if enum is not None:
-            schema_to_validate_against["enum"] = enum
-        if schema_to_validate_against:
-            # Raise a validation error if the value doesn't match the type
-            try:
-                jsonschema.validate(
-                    instance=value,
-                    schema=schema_to_validate_against,
-                )
-            except jsonschema.ValidationError as e:
-                raise UnsatisfiableSchemaError(f"const {value!r} does not match schema {schema_to_validate_against}") from e
+        try:
+            jsonschema.validate(
+                instance=value,
+                schema=parent_schema,
+            )
+        except jsonschema.ValidationError as e:
+            raise UnsatisfiableSchemaError(f"const {value!r} is inconsistent with parent schema: {parent_schema}") from e
         # Base case
         if isinstance(value, (type(None), bool, int, float, str)):
             return lm + json_dumps(value)
@@ -1122,19 +1114,19 @@ def enum(
         lm,
         *,
         options: Sequence[Union[None, bool, int, float, str, Mapping, Sequence]],
-        instance_type: Optional[Union[str, Sequence[str]]] = None,
+        parent_schema: JSONSchema,
     ):
         if not options:
             raise UnsatisfiableSchemaError("enum has no options")
         all_opts: list[GrammarFunction] = []
         for instance in options:
             try:
-                grm = self.const(value=instance, instance_type=instance_type)
+                grm = self.const(value=instance, parent_schema=parent_schema)
             except UnsatisfiableSchemaError:
                 continue
             all_opts.append(grm)
         if not all_opts:
-            raise ValueError(f"No valid options found for enum with type {instance_type!r}: {options}")
+            raise UnsatisfiableSchemaError(f"All enum options {options} are inconsistent with parent schema: {parent_schema}")
         return lm + select(options=all_opts)
 
 
@@ -1183,7 +1175,14 @@ def json(
         if json_schema == {}:
             return lm + self.any()
 
+        # Early exit for simple cases
+        if Keyword.CONST in json_schema:
+            return lm + self.const(value=json_schema[Keyword.CONST], parent_schema=json_schema)
 
+        if Keyword.ENUM in json_schema:
+            return lm + self.enum(options=json_schema[Keyword.ENUM], parent_schema=json_schema)
+
+        # More complex cases; validation needed
         validate_json_node_keys(json_schema)
         json_schema = self.push_sibling_keys(json_schema)
 
@@ -1207,18 +1206,6 @@ def json(
             assert not sibling_keys
             return lm + self.ref(reference=json_schema[Keyword.REF], base_uri=base_uri)
 
-        if Keyword.CONST in json_schema:
-            sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM}
-            if sibling_keys:
-                raise NotImplementedError(f"const with sibling keys is not yet supported. Got {sibling_keys}")
-            return lm + self.const(value=json_schema[Keyword.CONST], instance_type=json_schema.get(Keyword.TYPE, None), enum=json_schema.get(Keyword.ENUM, None))
-
-        if Keyword.ENUM in json_schema:
-            sibling_keys = get_sibling_keys(json_schema, Keyword.ENUM) - {Keyword.TYPE}
-            if sibling_keys:
-                raise NotImplementedError(f"enum with sibling keys is not yet supported. Got {sibling_keys}")
-            return lm + self.enum(options=json_schema[Keyword.ENUM], instance_type=json_schema.get(Keyword.TYPE, None))
-
         if Keyword.TYPE in json_schema and isinstance(json_schema[Keyword.TYPE], str):
             target_type = json_schema[Keyword.TYPE]
             if target_type == JSONType.NULL:
diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
index 4f9b07597..c276d7dfc 100644
--- a/tests/unit/library/json/test_json.py
+++ b/tests/unit/library/json/test_json.py
@@ -1527,7 +1527,7 @@ def test_invalid_typed_enum(self):
         schema_obj = {"enum": [1, "2"], "type": "boolean"}
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
-        assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']"
+        assert ve.value.args[0] == f"All enum options {[1, '2']} are inconsistent with parent schema: {schema_obj}"
 
 
 class TestConst:
@@ -1598,7 +1598,7 @@ def test_invalid_typed_const(self):
         schema_obj = {"const": 1, "type": "boolean"}
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
-        assert ve.value.args[0] == "const 1 does not match schema {'type': 'boolean'}"
+        assert ve.value.args[0] == f"const {1!r} is inconsistent with parent schema: {schema_obj}"
 
     def test_valid_enum_const(self):
         schema_obj = {"const": 1, "enum": [1, 2, 3]}
@@ -1610,7 +1610,7 @@ def test_invalid_enum_const(self):
         schema_obj = {"const": 1, "enum": [2, 3]}
         with pytest.raises(ValueError) as ve:
             gen_json(schema=schema_obj)
-        assert ve.value.args[0] == "const 1 does not match schema {'enum': [2, 3]}"
+        assert ve.value.args[0] == f"const {1!r} is inconsistent with parent schema: {schema_obj}"
 
     def test_valid_typed_enum_const(self):
         schema_obj = {"const": 1, "enum": [1, "2", 3], "type": "integer"}
@@ -1632,7 +1632,7 @@ def test_invalid_typed_enum_const(self, const):
             gen_json(schema=schema_obj)
         assert (
             ve.value.args[0]
-            == f"const {const!r} does not match schema {{'type': 'integer', 'enum': [1, '2', 3]}}"
+            == f"const {const!r} is inconsistent with parent schema: {schema_obj}"
         )
 
 

From 134410c52cc04aac606bbe082551a73f4cbcb0aa Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 5 Nov 2024 18:01:28 -0800
Subject: [PATCH 66/70] mypy

---
 guidance/library/_json.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 06a9adaed..932658a44 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -778,7 +778,7 @@ def oneOf(
         warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")
         return lm + self.anyOf(anyof_list=oneof_list, base_uri=base_uri)
 
-    def push_sibling_keys(self, json_schema: JSONSchema) -> JSONSchema:
+    def push_sibling_keys(self, json_schema: dict[str, Any]) -> dict[str, Any]:
         """
         If sibling keys are present next to anyOf, oneOf, or $ref, we push them down into an allOf.
         """
@@ -1057,7 +1057,7 @@ def allOf(
         self,
         lm,
         *,
-        parent_schema: JSONSchema,
+        parent_schema: dict[str, Any],
         base_uri: str,
     ):
         reduced_schema = self.reduce_schema(parent_schema, base_uri)

From 5d1c98618e87259ebee74ff7f181c31857970794 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Tue, 5 Nov 2024 18:01:54 -0800
Subject: [PATCH 67/70] delete files accidentally committed

---
 guidance/library/_json_normalization copy.py |  94 --------------
 guidance/library/_json_normalization.py      | 121 -------------------
 2 files changed, 215 deletions(-)
 delete mode 100644 guidance/library/_json_normalization copy.py
 delete mode 100644 guidance/library/_json_normalization.py

diff --git a/guidance/library/_json_normalization copy.py b/guidance/library/_json_normalization copy.py
deleted file mode 100644
index 12c9f29c3..000000000
--- a/guidance/library/_json_normalization copy.py	
+++ /dev/null
@@ -1,94 +0,0 @@
-from __future__ import annotations
-from typing import Any, Optional, TypedDict, cast, NamedTuple
-from itertools import product
-
-from typing import TypedDict, List, Union, Any, Dict
-
-# Unnormalized Schema Definitions
-
-class BaseSchema(TypedDict, total=False):
-    type: str
-    properties: Dict[str, Any]
-    items: Any
-    required: List[str]
-    enum: List[Any]
-    const: Any
-    minimum: int
-    maximum: int
-    minLength: int
-    maxLength: int
-    pattern: str
-    # Other schema keywords can be added here
-
-class Schema(BaseSchema):
-    allOf: List[Schema]
-    anyOf: List[Schema]
-    oneOf: List[Schema]
-
-# Normalized Schema Definitions
-
-class NormalizedAllOfSchema(BaseSchema):
-    allOf: List[BaseSchema]
-
-class NormalizedAnyOfSchema(TypedDict):
-    anyOf: List[Union[NormalizedAllOfSchema, BaseSchema]]
-
-class NormalizedOneOfSchema(TypedDict):
-    oneOf: List[Union[NormalizedAllOfSchema, BaseSchema]]
-
-# The NormalizedSchema can be a NormalizedBaseSchema or top-level combinators without nesting
-NormalizedSchema = Union[BaseSchema, NormalizedAllOfSchema, NormalizedAnyOfSchema, NormalizedOneOfSchema]
-
-class Combinators(NamedTuple):
-    allOf: List[Schema]
-    anyOf: List[Schema]
-    oneOf: List[Schema]
-
-def maybe_allOf(nodes: list[BaseSchema],  siblings: Optional[BaseSchema] = None) -> NormalizedSchema:
-    if len(nodes) == 1 and not siblings:
-        return nodes[0]
-    if siblings:
-        return {"allOf": [*nodes, siblings]}
-    return {"allOf": nodes}
-
-def get_combinators_and_siblings(node: Schema) -> tuple[Combinators, BaseSchema]:
-    allOf = cast(list[Schema], node.pop("allOf", []))
-    oneOf = cast(list[Schema], node.pop("oneOf", []))
-    anyOf = cast(list[Schema], node.pop("anyOf", []))
-    siblings = node
-    return Combinators(allOf, oneOf, anyOf), siblings
-
-def normalize(orig_node: Schema) -> NormalizedSchema:
-    ((allOf_list, oneOf_list, anyOf_list), siblings) = get_combinators_and_siblings(orig_node)
-    if not allOf_list and not oneOf_list and not anyOf_list:
-        return siblings
-    
-    allOf_list = normalize_allOf(allOf_list, siblings)
-    anyOf_list = normalize_oneOf_anyOf(anyOf_list)
-
-    if oneOf_list and anyOf_list:
-        node: NormalizedOneOfSchema = {
-            "oneOf": [
-                maybe_allOf([oneOf_item, anyOf_item, *allOf_list])
-                for anyOf_item in anyOf_list
-                for oneOf_item in oneOf_list
-            ]
-        }
-    elif oneOf_list:
-        node: NormalizedOneOfSchema = {
-            "oneOf": [
-                maybe_allOf([oneOf_item, *allOf_list])
-                for oneOf_item in oneOf_list
-            ]
-        }
-    elif anyOf_list:
-        node: NormalizedAnyOfSchema = {
-            "anyOf": [
-                maybe_allOf([anyOf_item, *allOf_list])
-                for anyOf_item in anyOf_list
-            ]
-        }
-    elif allOf_list:
-        node: NormalizedSchema = maybe_allOf(allOf_list)
-    
-    return node
\ No newline at end of file
diff --git a/guidance/library/_json_normalization.py b/guidance/library/_json_normalization.py
deleted file mode 100644
index 448f2ea8b..000000000
--- a/guidance/library/_json_normalization.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from __future__ import annotations
-from typing import Any, Optional
-from itertools import product
-
-
-def normalize_allOf(subnodes: list[dict[str, Any]], siblings: dict[str, Any] = {}) -> dict[str, Any]:
-    if not subnodes:
-        return siblings
-
-    # Normalization will ensure that there are no applicable "anyOf" or "oneOf" keys
-    # except at the top level of the schema
-    subnodes = [normalize(node) for node in subnodes]
-    groups = []
-    if any("oneOf" in node for node in subnodes):
-        # Binds more tightly than anyOf
-        kind = "oneOf"
-    elif any("anyOf" in node for node in subnodes):
-        kind = "anyOf"
-    else:
-        # We are done
-        return maybe_allOf(subnodes, siblings)
-
-    other = []
-    if siblings:
-        other.append(siblings)
-
-    for node in subnodes:
-        if "oneOf" in node and "anyOf" in node:
-            oneOf_list = node.pop("oneOf")
-            anyOf_list = node.pop("anyOf")
-            groups.append(list(product(oneOf_list, anyOf_list)))
-
-        elif "oneOf" in node:
-            oneOf_list = node.pop("oneOf")
-            groups.append(oneOf_list)
-
-        elif "anyOf" in node:
-            anyOf_list = node.pop("anyOf")
-            groups.append(anyOf_list)
-
-        if "allOf" in node:
-            other.extend(node.pop("allOf"))
-
-        if node:
-            # If there are any keys left, they need to end up in every allOf
-            other.append(node)
-
-    return {
-        kind: [
-            maybe_allOf([*item, *other])
-            for item in product(*groups)
-        ]
-    }
-
-def maybe_allOf(nodes: list[dict[str, Any]], siblings: Optional[dict[str, Any]] = None) -> dict[str, Any]:
-    if len(nodes) == 1 and not siblings:
-        return nodes[0]
-    if siblings:
-        return {"allOf": [*nodes, siblings]}
-    return {"allOf": nodes}
-
-
-def normalize(node: dict[str, Any]) -> dict[str, Any]:
-    node = normalize_allOf(node.pop("allOf", []), node)
-    oneOf_list = node.pop("oneOf", [])
-    anyOf_list = node.pop("anyOf", [])
-    allOf_list = node.pop("allOf", [])
-
-    if oneOf_list and anyOf_list:
-        node = {
-            "oneOf": [
-                maybe_allOf([oneOf_item, anyOf_item, *allOf_list], node)
-                for anyOf_item in anyOf_list
-                for oneOf_item in oneOf_list
-            ]
-        }
-    elif oneOf_list:
-        node = {
-            "oneOf": [
-                maybe_allOf([oneOf_item, *allOf_list], node)
-                for oneOf_item in oneOf_list
-            ]
-        }
-    elif anyOf_list:
-        node = {
-            "anyOf": [
-                maybe_allOf([anyOf_item, *allOf_list], node)
-                for anyOf_item in anyOf_list
-            ]
-        }
-    elif allOf_list:
-        node = maybe_allOf([node, *allOf_list])
-    return node
-
-def normalize_oneOf_anyOf(node: dict[str, Any]) -> dict[str, Any]:
-    oneOf_list = node.pop("oneOf", [])
-    anyOf_list = node.pop("anyOf", [])
-    allOf_list = node.pop("allOf", [])
-
-    if oneOf_list and anyOf_list:
-        node = {
-            "oneOf": [
-                maybe_allOf([oneOf_item, anyOf_item, *allOf_list], node)
-                for anyOf_item in anyOf_list
-                for oneOf_item in oneOf_list
-            ]
-        }
-    elif oneOf_list:
-        node = {
-            "oneOf": [
-                maybe_allOf([oneOf_item, *allOf_list], node)
-                for oneOf_item in oneOf_list
-            ]
-        }
-    elif anyOf_list:
-        node = {
-            "anyOf": [
-                maybe_allOf([anyOf_item, *allOf_list], node)
-                for anyOf_item in anyOf_list
-            ]
-        }

From 156728d04da9c941317e041d5f80a9513ee917af Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 6 Nov 2024 10:20:28 -0800
Subject: [PATCH 68/70] a few extra test cases

---
 tests/unit/library/json/test_allOf.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/unit/library/json/test_allOf.py b/tests/unit/library/json/test_allOf.py
index 2802a858b..e27d93940 100644
--- a/tests/unit/library/json/test_allOf.py
+++ b/tests/unit/library/json/test_allOf.py
@@ -80,6 +80,8 @@ def test_allOf_with_base_schema(self, test_object, valid):
             (25, True),
             # mismatch one
             (35, False),
+            # mismatch other
+            (15, False),
         ],
     )
     def test_allOf_simple_types(self, test_object, valid):
@@ -98,6 +100,8 @@ def test_allOf_simple_types(self, test_object, valid):
     @pytest.mark.parametrize(
         ["test_object", "valid"],
         [
+            # mismatch both
+            (15, False),
             # mismatch one
             (25, False),
             # valid
@@ -120,6 +124,8 @@ def test_allOf_simple_minimum(self, test_object, valid):
     @pytest.mark.parametrize(
         ["test_object", "valid"],
         [
+            # mismatch both
+            (35, False),
             # mismatch one
             (25, False),
             # valid

From 68900ed3a84016c99389134b17fcc30a4bfe0ac0 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 6 Nov 2024 10:25:56 -0800
Subject: [PATCH 69/70] more explicit NotImplementedError for oneOf, anyOf

---
 guidance/library/_json.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 932658a44..c01122edf 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -844,7 +844,18 @@ def reduce_schema(self, orig_schema: dict[str, Any], base_uri: str) -> dict[str,
         consts: list[Any] = []
 
         def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri: str):
-            if key == Keyword.REF:
+            if key == Keyword.ANYOF:
+                raise NotImplementedError("anyOf in allOf not yet supported")
+
+            elif key == Keyword.ONEOF:
+                raise NotImplementedError("oneOf in allOf not yet supported")
+
+            elif key == Keyword.ALLOF:
+                value = cast(Sequence[JSONSchema], value)
+                for schema in value:
+                    add_schema(schema, base_uri)
+
+            elif key == Keyword.REF:
                 ref = cast(str, value)
                 abspath = urijoin(base_uri, ref)
                 resolved = self._resolver.lookup(abspath)
@@ -865,11 +876,6 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri
                     value_set = set(value)
                 types.append(value_set)
 
-            elif key == Keyword.ALLOF:
-                value = cast(Sequence[JSONSchema], value)
-                for schema in value:
-                    add_schema(schema, base_uri)
-
             elif key == ObjectKeywords.PROPERTIES:
                 value = cast(dict[str, JSONSchema], value)
                 for name, schema in value.items():
@@ -914,10 +920,6 @@ def handle_keyword(key: str, value: Any, parent_schema: dict[str, Any], base_uri
                     (value, exempt_prefix_items)
                 )
 
-            elif key in set(Keyword):
-                # If we've done our job right, we should never hit this case...
-                raise NotImplementedError(f"Don't yet know how to handle {key} in allOf")
-
             elif key in other_data:
                 if key in {
                     NumberKeywords.MINIMUM, NumberKeywords.EXCLUSIVE_MINIMUM,

From 0a63f592ba076cb85002bae35a62fb4cb0db2f80 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 6 Nov 2024 11:27:11 -0800
Subject: [PATCH 70/70] add some comments about dropped
 UnsatisfiableSchemaErrors

---
 guidance/library/_json.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index c01122edf..61019e16a 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -577,7 +577,9 @@ def object(
                 # We get here if the schema is a literal False or is otherwise determined to be unsatisfiable
                 if name in required:
                     raise UnsatisfiableSchemaError(f"Required property {name!r} is unsatisfiable") from e
-                # Use json_dumps to properly quote / escape the key
+                # If the property is not required, we will just "blacklist" this key (e.g. if the schema was False)
+                # Note that we're just dropping this exception.
+                # Use json_dumps to properly quote / escape the key before adding it to the blacklist
                 key = json_dumps(name)
                 illegal_keys.add(key)
 
@@ -592,6 +594,10 @@ def object(
                     f"Required properties not in properties but additionalProperties is unsatisfiable."
                     f" Missing required properties: {list(r for r in required if r not in properties)}"
                 ) from e
+            else:
+                # If "additionalProperties" is unsatisfiable but there are no required properties that need to be validated against it,
+                # then we can safely ignore it. Note that this means that we are just going to drop this exception.
+                pass
 
         keys: list[str] = []
         required_items: list[bool] = []
@@ -682,6 +688,11 @@ def array(
                     f"prefixItems has too few elements ({len(prefix_items_schema)}) to satisfy minItems ({min_items})"
                     f" but item schema is unsatisfiable"
                 ) from e
+            else:
+                # If we've already satisfied min_items, we can just ignore the unsatisfiable item schema. This just means
+                # that we can't generate any more items after the prefix items.
+                # Note that this means that we are just going to drop this exception.
+                pass
 
         required_items = []
         optional_items = []
@@ -697,8 +708,10 @@ def array(
                     if i < min_items:
                         raise UnsatisfiableSchemaError(f"prefixItems[{i}] is unsatisfiable but min_items is {min_items}") from e
                     # Having an unsatisfiable prefix item is fine if we've already satisfied min_items, but this effectively sets max_items to i
+                    # Note that this means that we are just going to drop this exception.
                     max_items = i
                     break
+
             elif items_grammar is not None:
                 item = items_grammar
             else:
@@ -757,6 +770,8 @@ def anyOf(
             try:
                 options.append(self.json(json_schema=item, base_uri=base_uri))
             except UnsatisfiableSchemaError:
+                # No need to raise an error if one of the schemas is unsatisfiable. We'll check again at the end and raise if ALL
+                # schemas are unsatisfiable. Note that this means that we are just going to drop this exception.
                 pass
         if not options:
             # Can't really point to any one schema that's unsatisfiable, so let's include all the schemas in the error message
@@ -1125,6 +1140,8 @@ def enum(
             try:
                 grm = self.const(value=instance, parent_schema=parent_schema)
             except UnsatisfiableSchemaError:
+                # Like anyOf, we don't want to raise an error if one of the options is unsatisfiable. We'll check again at the end
+                # and raise if ALL options are unsatisfiable. Note that this means that we are just going to drop this exception.
                 continue
             all_opts.append(grm)
         if not all_opts: