diff --git a/src/v/pandaproxy/schema_registry/compatibility.cc b/src/v/pandaproxy/schema_registry/compatibility.cc index 0622c4e331352..958e835ba816e 100644 --- a/src/v/pandaproxy/schema_registry/compatibility.cc +++ b/src/v/pandaproxy/schema_registry/compatibility.cc @@ -148,6 +148,250 @@ ss::sstring proto_incompatibility::describe() const { fmt::arg("path", _path.string())); } +std::ostream& operator<<(std::ostream& os, const json_incompatibility_type& t) { + switch (t) { + case json_incompatibility_type::type_narrowed: + return os << "TYPE_NARROWED"; + case json_incompatibility_type::type_changed: + return os << "TYPE_CHANGED"; + case json_incompatibility_type::max_length_added: + return os << "MAX_LENGTH_ADDED"; + case json_incompatibility_type::max_length_decreased: + return os << "MAX_LENGTH_DECREASED"; + case json_incompatibility_type::min_length_added: + return os << "MIN_LENGTH_ADDED"; + case json_incompatibility_type::min_length_increased: + return os << "MIN_LENGTH_INCREASED"; + case json_incompatibility_type::pattern_added: + return os << "PATTERN_ADDED"; + case json_incompatibility_type::pattern_changed: + return os << "PATTERN_CHANGED"; + case json_incompatibility_type::maximum_added: + return os << "MAXIMUM_ADDED"; + case json_incompatibility_type::maximum_decreased: + return os << "MAXIMUM_DECREASED"; + case json_incompatibility_type::minimum_added: + return os << "MINIMUM_ADDED"; + case json_incompatibility_type::minimum_increased: + return os << "MINIMUM_INCREASED"; + case json_incompatibility_type::exclusive_maximum_added: + return os << "EXCLUSIVE_MAXIMUM_ADDED"; + case json_incompatibility_type::exclusive_maximum_decreased: + return os << "EXCLUSIVE_MAXIMUM_DECREASED"; + case json_incompatibility_type::exclusive_minimum_added: + return os << "EXCLUSIVE_MINIMUM_ADDED"; + case json_incompatibility_type::exclusive_minimum_increased: + return os << "EXCLUSIVE_MINIMUM_INCREASED"; + case json_incompatibility_type::multiple_of_added: + return os << "MULTIPLE_OF_ADDED"; + case json_incompatibility_type::multiple_of_expanded: + return os << "MULTIPLE_OF_EXPANDED"; + case json_incompatibility_type::multiple_of_changed: + return os << "MULTIPLE_OF_CHANGED"; + case json_incompatibility_type::required_attribute_added: + return os << "REQUIRED_ATTRIBUTE_ADDED"; + case json_incompatibility_type::max_properties_added: + return os << "MAX_PROPERTIES_ADDED"; + case json_incompatibility_type::max_properties_decreased: + return os << "MAX_PROPERTIES_DECREASED"; + case json_incompatibility_type::min_properties_added: + return os << "MIN_PROPERTIES_ADDED"; + case json_incompatibility_type::min_properties_increased: + return os << "MIN_PROPERTIES_INCREASED"; + case json_incompatibility_type::additional_properties_removed: + return os << "ADDITIONAL_PROPERTIES_REMOVED"; + case json_incompatibility_type::additional_properties_narrowed: + return os << "ADDITIONAL_PROPERTIES_NARROWED"; + case json_incompatibility_type::dependency_array_added: + return os << "DEPENDENCY_ARRAY_ADDED"; + case json_incompatibility_type::dependency_array_extended: + return os << "DEPENDENCY_ARRAY_EXTENDED"; + case json_incompatibility_type::dependency_array_changed: + return os << "DEPENDENCY_ARRAY_CHANGED"; + case json_incompatibility_type::dependency_schema_added: + return os << "DEPENDENCY_SCHEMA_ADDED"; + case json_incompatibility_type::property_added_to_open_content_model: + return os << "PROPERTY_ADDED_TO_OPEN_CONTENT_MODEL"; + case json_incompatibility_type:: + required_property_added_to_unopen_content_model: + return os << "REQUIRED_PROPERTY_ADDED_TO_UNOPEN_CONTENT_MODEL"; + case json_incompatibility_type::property_removed_from_closed_content_model: + return os << "PROPERTY_REMOVED_FROM_CLOSED_CONTENT_MODEL"; + case json_incompatibility_type:: + property_removed_not_covered_by_partially_open_content_model: + return os << "PROPERTY_REMOVED_NOT_COVERED_BY_PARTIALLY_OPEN_CONTENT_" + "MODEL"; + case json_incompatibility_type:: + property_added_not_covered_by_partially_open_content_model: + return os + << "PROPERTY_ADDED_NOT_COVERED_BY_PARTIALLY_OPEN_CONTENT_MODEL"; + case json_incompatibility_type::reserved_property_removed: + return os << "RESERVED_PROPERTY_REMOVED"; + case json_incompatibility_type::reserved_property_conflicts_with_property: + return os << "RESERVED_PROPERTY_CONFLICTS_WITH_PROPERTY"; + case json_incompatibility_type::max_items_added: + return os << "MAX_ITEMS_ADDED"; + case json_incompatibility_type::max_items_decreased: + return os << "MAX_ITEMS_DECREASED"; + case json_incompatibility_type::min_items_added: + return os << "MIN_ITEMS_ADDED"; + case json_incompatibility_type::min_items_increased: + return os << "MIN_ITEMS_INCREASED"; + case json_incompatibility_type::unique_items_added: + return os << "UNIQUE_ITEMS_ADDED"; + case json_incompatibility_type::additional_items_removed: + return os << "ADDITIONAL_ITEMS_REMOVED"; + case json_incompatibility_type::additional_items_narrowed: + return os << "ADDITIONAL_ITEMS_NARROWED"; + case json_incompatibility_type::item_added_to_open_content_model: + return os << "ITEM_ADDED_TO_OPEN_CONTENT_MODEL"; + case json_incompatibility_type::item_removed_from_closed_content_model: + return os << "ITEM_REMOVED_FROM_CLOSED_CONTENT_MODEL"; + case json_incompatibility_type:: + item_removed_not_covered_by_partially_open_content_model: + return os << "ITEM_REMOVED_NOT_COVERED_BY_PARTIALLY_OPEN_CONTENT_MODEL"; + case json_incompatibility_type:: + item_added_not_covered_by_partially_open_content_model: + return os << "ITEM_ADDED_NOT_COVERED_BY_PARTIALLY_OPEN_CONTENT_MODEL"; + case json_incompatibility_type::enum_array_narrowed: + return os << "ENUM_ARRAY_NARROWED"; + case json_incompatibility_type::enum_array_changed: + return os << "ENUM_ARRAY_CHANGED"; + case json_incompatibility_type::combined_type_changed: + return os << "COMBINED_TYPE_CHANGED"; + case json_incompatibility_type::product_type_extended: + return os << "PRODUCT_TYPE_EXTENDED"; + case json_incompatibility_type::sum_type_extended: + return os << "SUM_TYPE_EXTENDED"; + case json_incompatibility_type::sum_type_narrowed: + return os << "SUM_TYPE_NARROWED"; + case json_incompatibility_type::combined_type_subschemas_changed: + return os << "COMBINED_TYPE_SUBSCHEMAS_CHANGED"; + case json_incompatibility_type::not_type_extended: + return os << "NOT_TYPE_EXTENDED"; + case json_incompatibility_type::unknown: + return os << "UNKNOWN"; + } + __builtin_unreachable(); +} + +std::string_view description_for_type(json_incompatibility_type t) { + switch (t) { + case json_incompatibility_type::maximum_added: + case json_incompatibility_type::minimum_added: + case json_incompatibility_type::exclusive_maximum_added: + case json_incompatibility_type::exclusive_minimum_added: + case json_incompatibility_type::multiple_of_added: + case json_incompatibility_type::max_length_added: + case json_incompatibility_type::min_length_added: + case json_incompatibility_type::pattern_added: + case json_incompatibility_type::required_attribute_added: + case json_incompatibility_type::max_properties_added: + case json_incompatibility_type::min_properties_added: + case json_incompatibility_type::dependency_array_added: + case json_incompatibility_type::dependency_schema_added: + case json_incompatibility_type::max_items_added: + case json_incompatibility_type::min_items_added: + case json_incompatibility_type::unique_items_added: + case json_incompatibility_type::additional_items_removed: + case json_incompatibility_type::additional_properties_removed: + return "The keyword at path '{path}' in the {{reader}} schema is not " + "present in the {{writer}} schema"; + case json_incompatibility_type::min_length_increased: + case json_incompatibility_type::minimum_increased: + case json_incompatibility_type::exclusive_minimum_increased: + case json_incompatibility_type::min_properties_increased: + case json_incompatibility_type::multiple_of_expanded: + case json_incompatibility_type::min_items_increased: + return "The value at path '{path}' in the {{reader}} schema is more " + "than its value in the {{writer}} schema"; + case json_incompatibility_type::max_length_decreased: + case json_incompatibility_type::maximum_decreased: + case json_incompatibility_type::max_items_decreased: + case json_incompatibility_type::exclusive_maximum_decreased: + case json_incompatibility_type::max_properties_decreased: + return "The value at path '{path}' in the {{reader}} schema is less " + "than its value in the {{writer}} schema"; + case json_incompatibility_type::additional_items_narrowed: + case json_incompatibility_type::enum_array_narrowed: + case json_incompatibility_type::sum_type_narrowed: + case json_incompatibility_type::additional_properties_narrowed: + return "An array or combined type at path '{path}' has fewer elements " + "in the {{reader}} schema than the {{writer}} schema"; + case json_incompatibility_type::pattern_changed: + case json_incompatibility_type::multiple_of_changed: + case json_incompatibility_type::dependency_array_changed: + return "The value at path '{path}' is different between the {{reader}} " + "and {{writer}} schema"; + case json_incompatibility_type::type_changed: + case json_incompatibility_type::type_narrowed: + case json_incompatibility_type::combined_type_changed: + case json_incompatibility_type::combined_type_subschemas_changed: + case json_incompatibility_type::enum_array_changed: + return "A type at path '{path}' is different between the {{reader}} " + "schema and the {{writer}} schema"; + case json_incompatibility_type::dependency_array_extended: + case json_incompatibility_type::product_type_extended: + case json_incompatibility_type::sum_type_extended: + case json_incompatibility_type::not_type_extended: + return "An array or combined type at path '{path}' has more elements " + "in the {{reader}} schema than the {{writer}} schema"; + case json_incompatibility_type::property_added_to_open_content_model: + case json_incompatibility_type::item_added_to_open_content_model: + return "The {{reader}} schema has an open content model and has a " + "property or item at path '{path}' which is missing in the " + "{{writer}} schema"; + case json_incompatibility_type:: + required_property_added_to_unopen_content_model: + return "The {{reader}} schema has an unopen content model and has a " + "required property at path '{path}' which is missing in the " + "{{writer}} schema"; + case json_incompatibility_type::property_removed_from_closed_content_model: + case json_incompatibility_type::item_removed_from_closed_content_model: + return "The {{reader}} has a closed content model and is missing a " + "property or item present at path '{path}' in the {{writer}} " + "schema"; + case json_incompatibility_type:: + property_removed_not_covered_by_partially_open_content_model: + case json_incompatibility_type:: + item_removed_not_covered_by_partially_open_content_model: + return "A property or item is missing in the {{reader}} schema but " + "present at path '{path}' in the {{writer}} schema and is not " + "covered by its partially open content model"; + case json_incompatibility_type:: + property_added_not_covered_by_partially_open_content_model: + case json_incompatibility_type:: + item_added_not_covered_by_partially_open_content_model: + return "The {{reader}} schema has a property or item at path '{path}' " + "which is missing in the {{writer}} schema and is not covered " + "by its partially open content model"; + case json_incompatibility_type::reserved_property_removed: + return "The {{reader}} schema has reserved property '{path}' removed " + "from its metadata which is present in the {{writer}} schema."; + case json_incompatibility_type::reserved_property_conflicts_with_property: + return "The {{reader}} schema has property at path '{path}' that " + "conflicts with the reserved properties which is missing in the " + "{{writer}} schema."; + + case json_incompatibility_type::unknown: + return "{{reader}} schema is not compatible with {{writer}} schema: " + "check '{path}'"; + } + __builtin_unreachable(); +} + +std::ostream& operator<<(std::ostream& os, const json_incompatibility& v) { + fmt::print( + os, R"({{errorType:"{}", description:"{}"}})", v._type, v.describe()); + return os; +} + +ss::sstring json_incompatibility::describe() const { + return fmt::format( + fmt::runtime(description_for_type(_type)), + fmt::arg("path", _path.string())); +} + compatibility_result raw_compatibility_result::operator()(verbose is_verbose) && { compatibility_result result = {.is_compat = !has_error()}; diff --git a/src/v/pandaproxy/schema_registry/compatibility.h b/src/v/pandaproxy/schema_registry/compatibility.h index ef991d7495a02..12e3bcadb1856 100644 --- a/src/v/pandaproxy/schema_registry/compatibility.h +++ b/src/v/pandaproxy/schema_registry/compatibility.h @@ -136,6 +136,105 @@ class proto_incompatibility { Type _type; }; +enum class json_incompatibility_type { + type_narrowed = 0, + type_changed, + max_length_added, + max_length_decreased, + min_length_added, + min_length_increased, + pattern_added, + pattern_changed, + maximum_added, + maximum_decreased, + minimum_added, + minimum_increased, + exclusive_maximum_added, + exclusive_maximum_decreased, + exclusive_minimum_added, + exclusive_minimum_increased, + multiple_of_added, + multiple_of_expanded, + multiple_of_changed, + required_attribute_added, + max_properties_added, + max_properties_decreased, + min_properties_added, + min_properties_increased, + additional_properties_removed, + additional_properties_narrowed, + dependency_array_added, + dependency_array_extended, + dependency_array_changed, + dependency_schema_added, + property_added_to_open_content_model, + required_property_added_to_unopen_content_model, + property_removed_from_closed_content_model, + property_removed_not_covered_by_partially_open_content_model, + property_added_not_covered_by_partially_open_content_model, + reserved_property_removed, + reserved_property_conflicts_with_property, + max_items_added, + max_items_decreased, + min_items_added, + min_items_increased, + unique_items_added, + additional_items_removed, + additional_items_narrowed, + item_added_to_open_content_model, + item_removed_from_closed_content_model, + item_removed_not_covered_by_partially_open_content_model, + item_added_not_covered_by_partially_open_content_model, + enum_array_narrowed, + enum_array_changed, + combined_type_changed, + product_type_extended, + sum_type_extended, + sum_type_narrowed, + combined_type_subschemas_changed, + not_type_extended, + unknown, +}; + +/** + * json_incompatibility - A single incompatibility between JSON schemas. + * + * Encapsulates: + * - the path to the location of the incompatibility in the _writer_ schema + * - the type of incompatibility + * + * Primary interface is `describe`, which combines the contained info into + * a format string which can then be interpolated with identifying info for + * the reader and writer schemas in the request handler. + */ +class json_incompatibility { +public: + using Type = json_incompatibility_type; + json_incompatibility(std::filesystem::path path, Type type) + : _path(std::move(path)) + , _type(type) {} + + ss::sstring describe() const; + Type type() const { return _type; } + + friend std::ostream& + operator<<(std::ostream& os, const json_incompatibility& v); + + friend bool + operator==(const json_incompatibility&, const json_incompatibility&) + = default; + + // Helpful for unit testing + template + friend H AbslHashValue(H h, const json_incompatibility& e) { + return H::combine(std::move(h), e._path.string(), e._type); + } + +private: + std::filesystem::path _path; + Type _type; +}; + /** * raw_compatibility_result - A collection of unformatted proto or avro * incompatibilities. Its purpose is twofold: @@ -146,12 +245,23 @@ class proto_incompatibility { * incompatibilities into formatted error messages. */ class raw_compatibility_result { - using schema_incompatibility - = std::variant; + using schema_incompatibility = std::variant< + avro_incompatibility, + proto_incompatibility, + json_incompatibility>; public: raw_compatibility_result() = default; + template + requires std::constructible_from + && std::convertible_to + static auto of(Args&&... args) { + raw_compatibility_result res; + res.emplace(std::forward(args)...); + return res; + } + template requires std::constructible_from && std::convertible_to diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index cd6f637d0b773..ca191fa0084e2 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -18,6 +18,7 @@ #include "json/schema.h" #include "json/stringbuffer.h" #include "json/writer.h" +#include "pandaproxy/schema_registry/compatibility.h" #include "pandaproxy/schema_registry/error.h" #include "pandaproxy/schema_registry/errors.h" #include "pandaproxy/schema_registry/sharded_store.h" @@ -50,6 +51,7 @@ #include #include +#include #include #include @@ -57,6 +59,8 @@ namespace pandaproxy::schema_registry { namespace { +using json_compatibility_result = raw_compatibility_result; + // this is the list of supported dialects enum class json_schema_dialect { draft4, @@ -108,6 +112,10 @@ struct document_context { json_schema_dialect dialect; }; +// Passed into is_superset_* methods where the path and the generated verbose +// incompatibilities don't matter, only whether they are compatible or not +const static std::filesystem::path ignored_path = ""; + } // namespace struct json_schema_definition::impl { @@ -365,8 +373,11 @@ result parse_json(iobuf buf) { // a schema O is a superset of another schema N if every schema that is valid // for N is also valid for O. precondition: older and newer are both valid // schemas -bool is_superset( - context const& ctx, json::Value const& older, json::Value const& newer); +json_compatibility_result is_superset( + context const& ctx, + json::Value const& older, + json::Value const& newer, + std::filesystem::path p); // close the implementation in a namespace to keep it contained namespace is_superset_impl { @@ -614,11 +625,13 @@ extract_property_and_gate_check( // value | value | is_same or predicate template requires std::is_invocable_r_v -bool is_numeric_property_value_superset( +json_compatibility_result is_numeric_property_value_superset( json::Value const& older, json::Value const& newer, std::string_view prop_name, VPred&& value_predicate, + json_incompatibility changed_err, + json_incompatibility added_err, std::optional default_value = std::nullopt) { // get value or default_value auto get_value = [&](json::Value const& v) -> std::optional { @@ -626,7 +639,7 @@ bool is_numeric_property_value_superset( json::Value{prop_name.data(), rapidjson::SizeType(prop_name.size())}); if (it == v.MemberEnd()) { - return default_value; + return std::nullopt; } // Gate on values that can't be represented with doubles. @@ -649,29 +662,35 @@ bool is_numeric_property_value_superset( auto older_value = get_value(older); auto newer_value = get_value(newer); - if (older_value == newer_value) { - // either both not set or with the same value - return true; - } if (older_value.has_value() && newer_value.has_value()) { - return std::invoke( - std::forward(value_predicate), *older_value, *newer_value); + if (!std::invoke( + std::forward(value_predicate), + *older_value, + *newer_value)) { + return json_compatibility_result::of( + changed_err); + } + } else if (older_value.has_value()) { + if (!default_value.has_value() || *older_value != *default_value) { + // Non-default value was removed + return json_compatibility_result::of( + added_err); + } } - // (relevant only if default_value is not used) - // only one is set. if older is not set then newer has a value that is more - // restrictive, so older is a superset of newer - return !older_value.has_value(); + // Value only in newer or neither + return json_compatibility_result{}; } enum class additional_field_for { object, array }; -bool is_additional_superset( +json_compatibility_result is_additional_superset( context const& ctx, json::Value const& older, json::Value const& newer, - additional_field_for field_type) { + additional_field_for field_type, + std::filesystem::path p) { // "additional___" can be either true (if omitted it's true), false // or a schema. The check is performed with this table. // older ap | newer ap | compatible @@ -701,7 +720,22 @@ bool is_additional_superset( } } }; - + auto [additional_path, narrowed_errt, removed_errt] = [&] { + switch (field_type) { + case additional_field_for::object: + return std::make_tuple( + p / "additionalProperties", + json_incompatibility_type::additional_properties_narrowed, + json_incompatibility_type::additional_properties_removed); + case additional_field_for::array: + // Even for draft 202012 "additionalItems" is used in the path not + // "items" + return std::make_tuple( + p / "additionalItems", + json_incompatibility_type::additional_items_narrowed, + json_incompatibility_type::additional_items_removed); + } + }(); // helper to parse additional__ auto get_additional_props = [&](json_schema_dialect d, json::Value const& v) -> std::variant { @@ -719,64 +753,97 @@ bool is_additional_superset( // additionalProperties are boolean return std::visit( ss::make_visitor( - [](bool older, bool newer) { - if (older == newer) { - // same value is compatible - return true; + [&additional_path, removed_errt](bool older, bool newer) { + if (older || !newer) { + return json_compatibility_result{}; } - // older=true -> newer=false - compatible // older=false -> newer=true - not compatible - return older; + return json_compatibility_result::of( + std::move(additional_path), removed_errt); }, - [&ctx](bool older, json::Value const* newer) { + [&ctx, &additional_path, removed_errt]( + bool older, json::Value const* newer) { if (older) { // true is compatible with any schema - return true; + return json_compatibility_result{}; } // likely false, but need to check - return is_superset(ctx, get_false_schema(), *newer); + if (is_superset(ctx, get_false_schema(), *newer, ignored_path) + .has_error()) { + return json_compatibility_result::of( + std::move(additional_path), removed_errt); + } + return json_compatibility_result{}; }, - [&ctx](json::Value const* older, bool newer) { + [&ctx, &additional_path, narrowed_errt]( + json::Value const* older, bool newer) { if (!newer) { // any schema is compatible with false - return true; + return json_compatibility_result{}; } // convert newer to {} and check against that - return is_superset(ctx, *older, get_true_schema()); + if (is_superset(ctx, *older, get_true_schema(), ignored_path) + .has_error()) { + return json_compatibility_result::of( + std::move(additional_path), narrowed_errt); + } + return json_compatibility_result{}; }, - [&ctx](json::Value const* older, json::Value const* newer) { + [&ctx, + &additional_path](json::Value const* older, json::Value const* newer) { // check subschemas for compatibility - return is_superset(ctx, *older, *newer); + return is_superset(ctx, *older, *newer, std::move(additional_path)); }), get_additional_props(ctx.older.dialect(), older), get_additional_props(ctx.newer.dialect(), newer)); } -bool is_string_superset(json::Value const& older, json::Value const& newer) { +json_compatibility_result is_string_superset( + json::Value const& older, json::Value const& newer, std::filesystem::path p) { + json_compatibility_result res; + // note: "format" is not part of the checks - if (!is_numeric_property_value_superset( - older, newer, "minLength", std::less_equal<>{}, 0)) { - // older is less strict - return false; - } - if (!is_numeric_property_value_superset( - older, newer, "maxLength", std::greater_equal<>{})) { - // older is less strict - return false; - } + + res.merge(is_numeric_property_value_superset( + older, + newer, + "minLength", + std::less_equal<>{}, + {p / "minLength", json_incompatibility_type::min_length_increased}, + {p / "minLength", json_incompatibility_type::min_length_added}, + 0)); + + res.merge(is_numeric_property_value_superset( + older, + newer, + "maxLength", + std::greater_equal<>{}, + {p / "maxLength", json_incompatibility_type::max_length_decreased}, + {p / "maxLength", json_incompatibility_type::max_length_added})); auto [maybe_gate_value, older_val_p, newer_val_p] = extract_property_and_gate_check(older, newer, "pattern"); if (maybe_gate_value.has_value()) { - return maybe_gate_value.value(); + if (!maybe_gate_value.value()) { + res.emplace( + p / "pattern", json_incompatibility_type::pattern_added); + } + return res; } // both have "pattern". check if they are the same, the only // possible_value_accepted - return as_string_view(*older_val_p) == as_string_view(*newer_val_p); + if (as_string_view(*older_val_p) != as_string_view(*newer_val_p)) { + res.emplace( + p / "pattern", json_incompatibility_type::pattern_changed); + } + return res; } -bool is_numeric_superset(json::Value const& older, json::Value const& newer) { +json_compatibility_result is_numeric_superset( + json::Value const& older, json::Value const& newer, std::filesystem::path p) { + json_compatibility_result res; + // preconditions: // newer["type"]=="number" implies older["type"]=="number" // older["type"]=="integer" implies newer["type"]=="integer" @@ -790,41 +857,56 @@ bool is_numeric_superset(json::Value const& older, json::Value const& newer) { // "exclusiveMinimum" is always the exclusive range. in this check we // require for them to be the same datatype - if (!is_numeric_property_value_superset( - older, newer, "minimum", std::less_equal<>{})) { - // older["minimum"] is not superset of newer["minimum"] because newer is - // less strict - return false; - } - if (!is_numeric_property_value_superset( - older, newer, "maximum", std::greater_equal<>{})) { - // older["maximum"] is not superset of newer["maximum"] because newer - // is less strict - return false; - } - - if (!is_numeric_property_value_superset( - older, newer, "multipleOf", [](double older, double newer) { - // check that the reminder of newer/older is close enough to 0. - // close enough is defined as being close to the Unit in the Last - // Place of the bigger between the two. - // TODO: this is an approximate check, if a bigdecimal - // representation it would be possible to perform an exact - // reminder(newer, older)==0 check - constexpr auto max_ulp_error = 3; - return std::abs(std::remainder(newer, older)) - <= (max_ulp_error * boost::math::ulp(newer)); - })) { - return false; - } + // older["minimum"] is not superset of newer["minimum"] because newer is + // less strict + res.merge(is_numeric_property_value_superset( + older, + newer, + "minimum", + std::less_equal<>{}, + {p / "minimum", json_incompatibility_type::minimum_increased}, + {p / "minimum", json_incompatibility_type::minimum_added})); + + // older["maximum"] is not superset of newer["maximum"] because newer is + // less strict + res.merge(is_numeric_property_value_superset( + older, + newer, + "maximum", + std::greater_equal<>{}, + {p / "maximum", json_incompatibility_type::maximum_decreased}, + {p / "maximum", json_incompatibility_type::maximum_added})); + + // TODO: return multiple_of_expanded instead of multiple_of_changed if older + // is a multiple of newer + res.merge(is_numeric_property_value_superset( + older, + newer, + "multipleOf", + [](double older, double newer) { + // check that the reminder of newer/older is close enough to 0. + // close enough is defined as being close to the Unit in the Last + // Place of the bigger between the two. + // TODO: this is an approximate check, if a bigdecimal + // representation it would be possible to perform an exact + // reminder(newer, older)==0 check + constexpr auto max_ulp_error = 3; + return std::abs(std::remainder(newer, older)) + <= (max_ulp_error * boost::math::ulp(newer)); + }, + {p / "multipleOf", json_incompatibility_type::multiple_of_changed}, + {p / "multipleOf", json_incompatibility_type::multiple_of_added})); // exclusiveMinimum/exclusiveMaximum checks are mostly the same logic, // implemented in this helper - auto exclusive_limit_check = []( - json::Value const& older, - json::Value const& newer, - std::string_view prop_name, - std::invocable auto pred) { + auto exclusive_limit_check = + []( + json::Value const& older, + json::Value const& newer, + std::string_view prop_name, + std::invocable auto pred, + json_compatibility_result changed_err, + json_compatibility_result added_err) -> json_compatibility_result { auto get_value = [=](json::Value const& v) -> std::variant { auto it = v.FindMember(json::Value{ @@ -847,27 +929,36 @@ bool is_numeric_superset(json::Value const& older, json::Value const& newer) { return std::visit( ss::make_visitor( - [](bool older, bool newer) { + [&](bool older, bool newer) { // compatible if no change or if older was not "exclusive" - return older == newer || older == false; + if (older != newer && older != false) { + return changed_err; + } + return json_compatibility_result{}; }, - [](bool older, std::monostate) { + [&](bool older, std::monostate) { // monostate defaults to false, compatible if older is false - return older == false; + if (older != false) { + return added_err; + } + return json_compatibility_result{}; }, [&](double older, double newer) { // delegate to pred - return std::invoke(pred, older, newer); + if (!std::invoke(pred, older, newer)) { + return changed_err; + } + return json_compatibility_result{}; }, - [](double, std::monostate) { + [&](double, std::monostate) { // newer is less strict than older - return false; + return added_err; }, [](std::monostate, auto) { // older has no rules, compatible with everything - return true; + return json_compatibility_result{}; }, - [&](auto, auto) -> bool { + [&](auto, auto) -> json_compatibility_result { throw as_exception(invalid_schema(fmt::format( R"(is_numeric_superset-{} not implemented for mixed types: older: '{}', newer: '{}')", prop_name, @@ -878,21 +969,42 @@ bool is_numeric_superset(json::Value const& older, json::Value const& newer) { get_value(newer)); }; - if (!exclusive_limit_check( - older, newer, "exclusiveMinimum", std::less_equal<>{})) { - return false; - } - - if (!exclusive_limit_check( - older, newer, "exclusiveMaximum", std::greater_equal<>{})) { - return false; - } - - return true; + auto p_exlusive_minimum = p / "exclusiveMinimum"; + res.merge(exclusive_limit_check( + older, + newer, + "exclusiveMinimum", + std::less_equal<>{}, + json_compatibility_result::of( + p_exlusive_minimum, + json_incompatibility_type::exclusive_minimum_increased), + json_compatibility_result::of( + p_exlusive_minimum, + json_incompatibility_type::exclusive_minimum_added))); + + auto p_exlusive_maximum = p / "exclusiveMaximum"; + res.merge(exclusive_limit_check( + older, + newer, + "exclusiveMaximum", + std::greater_equal<>{}, + json_compatibility_result::of( + p_exlusive_maximum, + json_incompatibility_type::exclusive_maximum_decreased), + json_compatibility_result::of( + p_exlusive_maximum, + json_incompatibility_type::exclusive_maximum_added))); + + return res; } -bool is_array_superset( - context const& ctx, json::Value const& older, json::Value const& newer) { +json_compatibility_result is_array_superset( + context const& ctx, + json::Value const& older, + json::Value const& newer, + std::filesystem::path p) { + json_compatibility_result res; + // "type": "array" is used to model an array or a tuple. // for array, "items" is a schema that validates all the elements. // for tuple in Draft4, "items" is an array of schemas to validate the @@ -904,15 +1016,22 @@ bool is_array_superset( // then is split based on array/tuple. // size checks are common to both types - if (!is_numeric_property_value_superset( - older, newer, "minItems", std::less_equal<>{}, 0)) { - return false; - } - - if (!is_numeric_property_value_superset( - older, newer, "maxItems", std::greater_equal<>{})) { - return false; - } + res.merge(is_numeric_property_value_superset( + older, + newer, + "minItems", + std::less_equal<>{}, + {p / "minItems", json_incompatibility_type::min_items_increased}, + {p / "minItems", json_incompatibility_type::min_items_added}, + 0)); + + res.merge(is_numeric_property_value_superset( + older, + newer, + "maxItems", + std::greater_equal<>{}, + {p / "maxItems", json_incompatibility_type::max_items_decreased}, + {p / "maxItems", json_incompatibility_type::max_items_added})); // uniqueItems makes sense mostly for arrays, but it's also allowed for // tuples, so the validation is done here @@ -936,7 +1055,8 @@ bool is_array_superset( if (older_value == true && newer_value == false) { // removed unique items requirement - return false; + res.emplace( + p / "uniqueItems", json_incompatibility_type::unique_items_added); } // in draft 2020, "prefixItems" is used to represent tuples instead of an @@ -980,7 +1100,9 @@ bool is_array_superset( if (older_is_tuple != newer_is_tuple) { // one is a tuple and the other is not. not compatible - return false; + res.emplace( + p / "items", json_incompatibility_type::unknown); + return res; } // both are tuples or both are arrays @@ -989,10 +1111,12 @@ bool is_array_superset( // note that "additionalItems" can be defined, but it's // not used by validation because every element is validated against // "items" - return is_superset( + res.merge(is_superset( ctx, get_object_or_empty(ctx.older, older, "items"), - get_object_or_empty(ctx.newer, newer, "items")); + get_object_or_empty(ctx.newer, newer, "items"), + p / "items")); + return res; } // both are tuple schemas, validation is similar to object. one side @@ -1000,62 +1124,73 @@ bool is_array_superset( // first check is for "additionalItems" compatibility, it's cheaper than the // rest - if (!is_additional_superset( - ctx, older, newer, additional_field_for::array)) { - return false; + + res.merge(is_additional_superset( + ctx, older, newer, additional_field_for::array, p)); + if (res.has_error()) { + return res; } auto older_tuple_schema = older[get_tuple_items_kw(ctx.older.dialect())].GetArray(); auto newer_tuple_schema = newer[get_tuple_items_kw(ctx.newer.dialect())].GetArray(); - // find the first pair of schemas that do not match - auto [older_it, newer_it] = std::ranges::mismatch( - older_tuple_schema, - newer_tuple_schema, - [&ctx](auto const& older, auto const& newer) { - return is_superset(ctx, older, newer); - }); - - if ( - older_it != older_tuple_schema.end() - && newer_it != newer_tuple_schema.end()) { - // if both iterators are not end iterators, they are pointing to a - // pair of elements where is_superset(*older_it, *newer_it)==false, - // not compatible - return false; + auto older_it = older_tuple_schema.begin(); + auto newer_it = newer_tuple_schema.begin(); + int index = 0; + for (; older_it != older_tuple_schema.end() + && newer_it != newer_tuple_schema.end(); + ++older_it, ++newer_it, ++index) { + res.merge(is_superset( + ctx, *older_it, *newer_it, p / "items" / std::to_string(index))); + if (res.has_error()) { + return res; + } } // no mismatching elements, and they don't have the same size. // To be compatible, excess elements needs to be compatible with the other // "additionalItems" schema - auto older_additional_schema = get_object_or_empty( ctx.older, older, get_additional_items_kw(ctx.older.dialect())); - if (!std::all_of( - newer_it, newer_tuple_schema.end(), [&](json::Value const& n) { - return is_superset(ctx, older_additional_schema, n); - })) { - // newer has excess elements that are not compatible with - // older["additionalItems"] - return false; - } - auto newer_additional_schema = get_object_or_empty( ctx.newer, newer, get_additional_items_kw(ctx.newer.dialect())); - if (!std::all_of( - older_it, older_tuple_schema.end(), [&](json::Value const& o) { - return is_superset(ctx, o, newer_additional_schema); - })) { - // older has excess elements that are not compatible with - // newer["additionalItems"] - return false; - } - return true; + + // newer_has_more: true if newer has excess elements, false if older has + // excess elements + auto newer_has_more = newer_it != newer_tuple_schema.end(); + auto excess_begin = newer_has_more ? newer_it : older_it; + auto excess_end = newer_has_more ? newer_tuple_schema.end() + : older_tuple_schema.end(); + auto errt = newer_has_more + ? json_incompatibility_type:: + item_removed_not_covered_by_partially_open_content_model + : json_incompatibility_type:: + item_added_not_covered_by_partially_open_content_model; + + std::for_each(excess_begin, excess_end, [&](json::Value const& e) { + auto item_p = p / "items" / std::to_string(index); + auto sup_res = newer_has_more + ? is_superset(ctx, older_additional_schema, e, item_p) + : is_superset(ctx, e, newer_additional_schema, item_p); + + if (sup_res.has_error()) { + res.merge(std::move(sup_res)); + res.emplace(std::move(item_p), errt); + } + + ++index; + }); + + return res; } -bool is_object_properties_superset( - context const& ctx, json::Value const& older, json::Value const& newer) { +json_compatibility_result is_object_properties_superset( + context const& ctx, + json::Value const& older, + json::Value const& newer, + std::filesystem::path p) { + json_compatibility_result res; // check that every property in newer["properties"] // if it appears in older["properties"], // then it has to be compatible with the schema @@ -1067,7 +1202,7 @@ bool is_object_properties_superset( auto newer_properties = get_object_or_empty(ctx.newer, newer, "properties"); if (newer_properties.ObjectEmpty()) { // no "properties" in newer, all good - return true; + return res; } // older["properties"] is a map of @@ -1080,14 +1215,15 @@ bool is_object_properties_superset( ctx.older, older, "additionalProperties"); // scan every prop in newer["properties"] for (auto const& [prop, schema] : newer_properties) { + auto prop_path = [&p, &prop] { + return p / "properties" / prop.GetString(); + }; + // it is either an evolution of a schema in older["properties"] if (auto older_it = older_properties.FindMember(prop); older_it != older_properties.MemberEnd()) { // prop exists in both - if (!is_superset(ctx, older_it->value, schema)) { - // not compatible - return false; - } + res.merge(is_superset(ctx, older_it->value, schema, prop_path())); // check next property continue; } @@ -1102,28 +1238,60 @@ bool is_object_properties_superset( auto regex = re2::RE2(as_string_view(propPattern)); if (re2::RE2::PartialMatch(pname, regex)) { pattern_match_found = true; - if (!is_superset(ctx, schemaPattern, schema)) { - // not compatible - return false; + + auto prop_res = is_superset( + ctx, schemaPattern, schema, prop_path()); + + if (prop_res.has_error()) { + res.merge(std::move(prop_res)); + res.emplace( + prop_path(), + json_incompatibility_type:: + property_removed_not_covered_by_partially_open_content_model); + break; } } } + if (pattern_match_found) { + // check next property + continue; + } // or it should check against older["additionalProperties"], if no match // in patternProperties was found - if ( - !pattern_match_found - && !is_superset(ctx, older_additional_properties, schema)) { - // not compatible - return false; + if (is_false_schema(older_additional_properties)) { + res.emplace( + prop_path(), + json_incompatibility_type:: + property_removed_from_closed_content_model); + continue; + } + + auto add_prop_res = is_superset( + ctx, older_additional_properties, schema, prop_path()); + + if (add_prop_res.has_error()) { + res.merge(std::move(add_prop_res)); + res.emplace( + prop_path(), + json_incompatibility_type:: + property_removed_not_covered_by_partially_open_content_model); + continue; } + + // Additional properties matched. We can move on to the next property + // without any errors added to res. } - return true; + return res; } -bool is_object_required_superset( - context const& ctx, json::Value const& older, json::Value const& newer) { +json_compatibility_result is_object_required_superset( + context const& ctx, + json::Value const& older, + json::Value const& newer, + std::filesystem::path p) { + json_compatibility_result res; // to pass the check, a required property from newer has to be present in // older, or if new it needs to have a default value. // note that: @@ -1149,89 +1317,150 @@ bool is_object_required_superset( // yes | yes | yes // yes | no | if it has "default" in older // no | yes | yes - return std::ranges::all_of( + std::ranges::for_each( older_req_in_both_properties, [&](json::Value const& o) { - return std::ranges::find(newer_req, o) != newer_req.End() - || older_props[o].HasMember("default"); + if ( + std::ranges::find(newer_req, o) == newer_req.End() + && !older_props[o].HasMember("default")) { + res.emplace( + p / "required" / as_string_view(o), + json_incompatibility_type::required_attribute_added); + } }); + return res; } -bool is_object_dependencies_superset( - context const& ctx, json::Value const& older, json::Value const& newer) { +json_compatibility_result is_object_dependencies_superset( + context const& ctx, + json::Value const& older, + json::Value const& newer, + std::filesystem::path p) { + json_compatibility_result res; // "dependencies", if present, is a dict of . To be compatible, each key in older has to be in newer and the // values have to be of the same type and compatible. // older string array needs to be a subset of newer string array. // older schema needs to be compatible with newer schema - auto [maybe_skip, older_p, newer_p] = extract_property_and_gate_check( - older, newer, "dependencies"); - if (maybe_skip.has_value()) { - return maybe_skip.value(); - } - // older and newer have "dependencies" + auto older_p = get_object_or_empty(ctx.older, older, "dependencies"); + auto newer_p = get_object_or_empty(ctx.newer, newer, "dependencies"); // all dependencies in older need to carry over in newer, and need to be // compatible // TODO: n^2 search - return std::ranges::all_of( - older_p->GetObject(), - [&ctx, - newer_dep = newer_p->GetObject()](json::Value::Member const& older_dep) { - auto n_it = newer_dep.FindMember(older_dep.name); - if (n_it == newer_dep.MemberEnd()) { - // dependency definition removed, not compatible - return false; - } - // check that the dependency values for this name are compatible - auto const& o = older_dep.value; - auto const& n = n_it->value; - - if (o.IsArray() && n.IsArray()) { - // string array: n needs to be a a superset of o - // TODO: n^2 search - return std::ranges::all_of( - o.GetArray(), [n_array = n.GetArray()](json::Value const& p) { - return std::ranges::find(n_array, p) != n_array.End(); - }); - } + std::ranges::for_each(older_p, [&](json::Value::Member const& older_dep) { + auto path_dep = p / "dependencies" / as_string_view(older_dep.name); + auto const& o = older_dep.value; + auto n_it = newer_p.FindMember(older_dep.name); + + if (o.IsObject()) { + if (n_it == newer_p.MemberEnd()) { + res.emplace( + std::move(path_dep), + json_incompatibility_type::dependency_schema_added); + return; + } - if (o.IsObject() && n.IsObject()) { - // schemas: o and n needs to be compatible - return is_superset(ctx, o, n); - } + auto const& n = n_it->value; - return false; - }); + if (!n.IsObject()) { + res.emplace( + std::move(path_dep), + json_incompatibility_type::dependency_schema_added); + return; + } + + // schemas: o and n needs to be compatible + res.merge(is_superset(ctx, o, n, std::move(path_dep))); + } else if (o.IsArray()) { + if (n_it == newer_p.MemberEnd()) { + res.emplace( + std::move(path_dep), + json_incompatibility_type::dependency_array_added); + return; + } + + auto const& n = n_it->value; + + if (!n.IsArray()) { + res.emplace( + std::move(path_dep), + json_incompatibility_type::dependency_array_added); + return; + } + // string array: n needs to be a a superset of o + // TODO: n^2 search + bool n_superset_of_o = std::ranges::all_of( + o.GetArray(), [n_array = n.GetArray()](json::Value const& p) { + return std::ranges::find(n_array, p) != n_array.End(); + }); + if (!n_superset_of_o) { + bool o_superset_of_n = std::ranges::all_of( + n.GetArray(), [o_array = o.GetArray()](json::Value const& p) { + return std::ranges::find(o_array, p) != o_array.End(); + }); + if (o_superset_of_n) { + res.emplace( + std::move(path_dep), + json_incompatibility_type::dependency_array_extended); + } else { + res.emplace( + std::move(path_dep), + json_incompatibility_type::dependency_array_changed); + } + } + return; + } else { + throw as_exception(invalid_schema(fmt::format( + "dependencies can only be an array or an object for valid " + "schemas but it was: {}", + pj{o}))); + } + }); + + return res; } -bool is_object_superset( - context const& ctx, json::Value const& older, json::Value const& newer) { - if (!is_numeric_property_value_superset( - older, newer, "minProperties", std::less_equal<>{}, 0)) { - // newer requires less properties to be set - return false; - } - if (!is_numeric_property_value_superset( - older, newer, "maxProperties", std::greater_equal<>{})) { - // newer requires more properties to be set - return false; - } - if (!is_additional_superset( - ctx, older, newer, additional_field_for::object)) { - // additional properties are not compatible - return false; - } - if (!is_object_properties_superset(ctx, older, newer)) { - // "properties" in newer might not be compatible with - // older["properties"] (incompatible evolution) or - // older["patternProperties"] (it is not compatible with the pattern - // that matches the new name) or older["additionalProperties"] (older - // has partial open model that does not allow some new properties in - // newer) - return false; - } +json_compatibility_result is_object_superset( + context const& ctx, + json::Value const& older, + json::Value const& newer, + std::filesystem::path p) { + json_compatibility_result res; + + // newer requires less properties to be set + res.merge(is_numeric_property_value_superset( + older, + newer, + "minProperties", + std::less_equal<>{}, + {p / "minProperties", + json_incompatibility_type::min_properties_increased}, + {p / "minProperties", json_incompatibility_type::min_properties_added}, + 0)); + + // newer requires more properties to be set + res.merge(is_numeric_property_value_superset( + older, + newer, + "maxProperties", + std::greater_equal<>{}, + {p / "maxProperties", + json_incompatibility_type::max_properties_decreased}, + {p / "maxProperties", json_incompatibility_type::max_properties_added})); + + // Check if additional properties are compatible + res.merge(is_additional_superset( + ctx, older, newer, additional_field_for::object, p)); + + // "properties" in newer might not be compatible with + // older["properties"] (incompatible evolution) or + // older["patternProperties"] (it is not compatible with the pattern + // that matches the new name) or older["additionalProperties"] (older + // has partial open model that does not allow some new properties in + // newer) + res.merge(is_object_properties_superset(ctx, older, newer, p)); // note: to match the behavior of the legacy software, // ``` @@ -1240,19 +1469,20 @@ bool is_object_superset( // ``` // is omitted - if (!is_object_required_superset(ctx, older, newer)) { - // required properties are not compatible - return false; - } - if (!is_object_dependencies_superset(ctx, older, newer)) { - // dependencies are not compatible - return false; - } + // Check if required properties are compatible + res.merge(is_object_required_superset(ctx, older, newer, p)); - return true; + // Check if dependencies are compatible + res.merge(is_object_dependencies_superset(ctx, older, newer, std::move(p))); + + return res; } -bool is_enum_superset(json::Value const& older, json::Value const& newer) { +json_compatibility_result is_enum_superset( + json::Value const& older, json::Value const& newer, std::filesystem::path p) { + json_compatibility_result res; + auto enum_p = p / "enum"; + auto older_it = older.FindMember("enum"); auto newer_it = newer.FindMember("enum"); auto older_is_enum = older_it != older.MemberEnd(); @@ -1260,12 +1490,14 @@ bool is_enum_superset(json::Value const& older, json::Value const& newer) { if (!older_is_enum && !newer_is_enum) { // both are not an "enum" schema, compatible - return true; + return res; } if (!(older_is_enum && newer_is_enum)) { // only one is an "enum" schema, not compatible - return false; + res.emplace( + std::move(enum_p), json_incompatibility_type::enum_array_changed); + return res; } // both "enum" @@ -1276,7 +1508,9 @@ bool is_enum_superset(json::Value const& older, json::Value const& newer) { if (newer_set.Size() > older_set.Size()) { // quick check: // newer has some value not in older - return false; + res.emplace( + std::move(enum_p), json_incompatibility_type::enum_array_changed); + return res; } // TODO: current implementation is O(n^2), but could be O(n) with normalized @@ -1286,15 +1520,22 @@ bool is_enum_superset(json::Value const& older, json::Value const& newer) { // also be improved with normalization of the input if (older_set.end() == std::ranges::find(older_set, v)) { // newer has an element not in older - return false; + res.emplace( + std::move(enum_p), json_incompatibility_type::enum_array_changed); + return res; } } - return true; + return res; } -bool is_not_combinator_superset( - context const& ctx, json::Value const& older, json::Value const& newer) { +json_compatibility_result is_not_combinator_superset( + context const& ctx, + json::Value const& older, + json::Value const& newer, + std::filesystem::path p) { + json_compatibility_result res; + auto older_it = older.FindMember("not"); auto newer_it = newer.FindMember("not"); auto older_has_not = older_it != older.MemberEnd(); @@ -1302,19 +1543,29 @@ bool is_not_combinator_superset( if (older_has_not != newer_has_not) { // only one has a "not" schema, not compatible - return false; + res.emplace( + p, json_incompatibility_type::type_changed); + return res; } if (older_has_not && newer_has_not) { // for not combinator, we want to check if the "not" newer subschema is // less strict than the older subschema, because this means that newer // validated less data than older - return is_superset( - {ctx.newer, ctx.older}, newer_it->value, older_it->value); + auto is_not_superset = is_superset( + {ctx.newer, ctx.older}, + newer_it->value, + older_it->value, + ignored_path); + + if (is_not_superset.has_error()) { + res.emplace( + p / "not", json_incompatibility_type::not_type_extended); + } } // both do not have a "not" key, compatible - return true; + return res; } enum class p_combinator { oneOf, allOf, anyOf }; @@ -1329,8 +1580,13 @@ json::Value to_keyword(p_combinator c) { } } -bool is_positive_combinator_superset( - context const& ctx, json::Value const& older, json::Value const& newer) { +json_compatibility_result is_positive_combinator_superset( + context const& ctx, + json::Value const& older, + json::Value const& newer, + std::filesystem::path p) { + json_compatibility_result res; + auto get_combinator = [](json::Value const& v) { auto res = std::optional{}; for (auto c : @@ -1354,13 +1610,15 @@ bool is_positive_combinator_superset( auto maybe_newer_comb = get_combinator(newer); if (!maybe_older_comb.has_value()) { // older has not a combinator, maximum freedom for newer. compatible - return true; + return res; } // older has a combinator if (!maybe_newer_comb.has_value()) { // older has a combinator but newer does not. not compatible - return false; + res.emplace( + std::move(p), json_incompatibility_type::combined_type_changed); + return res; } // newer has a combinator @@ -1378,8 +1636,17 @@ bool is_positive_combinator_superset( if (older_schemas.Size() == 1 && newer_schemas.Size() == 1) { // both combinators have only one subschema, so the actual // combinator does not matter. compare subschemas directly - return is_superset( - ctx, *older_schemas.Begin(), *newer_schemas.Begin()); + auto is_combinator_superset = is_superset( + ctx, + *older_schemas.Begin(), + *newer_schemas.Begin(), + ignored_path); + if (is_combinator_superset.has_error()) { + res.emplace( + std::move(p), + json_incompatibility_type::combined_type_subschemas_changed); + } + return res; } // either older or newer - or both - has more than one subschema @@ -1387,24 +1654,42 @@ bool is_positive_combinator_superset( if (older_schemas.Size() == 1 && newer_comb == p_combinator::allOf) { // older has only one subschema, newer is "allOf" so it can be // compatible if any one of the subschemas matches older - return std::ranges::any_of( + auto any_superset = std::ranges::any_of( newer_schemas, [&](json::Value const& s) { - return is_superset(ctx, *older_schemas.Begin(), s); + return !is_superset( + ctx, *older_schemas.Begin(), s, ignored_path) + .has_error(); }); + if (!any_superset) { + res.emplace( + std::move(p), + json_incompatibility_type::combined_type_subschemas_changed); + } + return res; } if (older_comb == p_combinator::oneOf && newer_schemas.Size() == 1) { // older has multiple schemas but only one can be valid. it's // compatible if the only subschema in newer is compatible with one // in older - return std::ranges::any_of( + auto any_superset = std::ranges::any_of( older_schemas, [&](json::Value const& s) { - return is_superset(ctx, s, *newer_schemas.Begin()); + return !is_superset( + ctx, s, *newer_schemas.Begin(), ignored_path) + .has_error(); }); + if (!any_superset) { + res.emplace( + std::move(p), + json_incompatibility_type::combined_type_subschemas_changed); + } + return res; } // different combinators, not a special case. not compatible - return false; + res.emplace( + std::move(p), json_incompatibility_type::combined_type_changed); + return res; } // same combinator for older and newer, or older is "anyOf" @@ -1415,14 +1700,18 @@ bool is_positive_combinator_superset( if (older_schemas.Size() > newer_schemas.Size()) { if (older_comb == p_combinator::allOf) { // older has more restrictions than newer, not compatible - return false; + res.emplace( + std::move(p), json_incompatibility_type::product_type_extended); + return res; } } else if (older_schemas.Size() < newer_schemas.Size()) { if ( newer_comb == p_combinator::anyOf || newer_comb == p_combinator::oneOf) { // newer has more degrees of freedom than older, not compatible - return false; + res.emplace( + std::move(p), json_incompatibility_type::sum_type_narrowed); + return res; } } @@ -1444,7 +1733,9 @@ bool is_positive_combinator_superset( auto superset_graph = graph_t{older_schemas.Size() + newer_schemas.Size()}; for (auto o = 0u; o < older_schemas.Size(); ++o) { for (auto n = 0u; n < newer_schemas.Size(); ++n) { - if (is_superset(ctx, older_schemas[o], newer_schemas[n])) { + if (!is_superset( + ctx, older_schemas[o], newer_schemas[n], ignored_path) + .has_error()) { // translate n for the graph auto n_index = n + older_schemas.Size(); add_edge(o, n_index, superset_graph); @@ -1464,10 +1755,13 @@ bool is_positive_combinator_superset( // one of sub schema was left out, meaning that it either had no valid // is_superset() relation with the other schema array, or that the // algorithm couldn't find a unique compatible pattern. - return false; + res.emplace( + std::move(p), + json_incompatibility_type::combined_type_subschemas_changed); + return res; } - return true; + return res; } } // namespace is_superset_impl @@ -1477,15 +1771,18 @@ using namespace is_superset_impl; // a schema O is a superset of another schema N if every schema that is valid // for N is also valid for O. precondition: older and newer are both valid // schemas -bool is_superset( +json_compatibility_result is_superset( context const& ctx, json::Value const& older_schema, - json::Value const& newer_schema) { + json::Value const& newer_schema, + std::filesystem::path p) { + json_compatibility_result res; + // break recursion if parameters are atoms: if (is_true_schema(older_schema) || is_false_schema(newer_schema)) { // either older is the superset of every possible schema, or newer is // the subset of every possible schema - return true; + return res; } auto older = get_schema(ctx.older, older_schema); @@ -1508,7 +1805,24 @@ bool is_superset( // newer_types_not_in_older accepts integer, and we can accept an // evolution from number -> integer. everything else is makes `newer` // less strict than older - return false; + + auto older_minus_newer = json_type_list{}; + std::ranges::set_difference( + older_types, newer_types, std::back_inserter(older_minus_newer)); + + if ( + older_minus_newer.empty() + || (older_minus_newer == json_type_list{json_type::integer} && newer_minus_older == json_type_list{json_type::number})) { + // type_narrowed is reported when older has fewer elements or the + // same but with integer in older instead of number in newer + res.emplace( + std::move(p), json_incompatibility_type::type_narrowed); + } else { + // Otherwise report the more general type_changed + res.emplace( + std::move(p), json_incompatibility_type::type_changed); + } + return res; } // newer accepts less (or equal) types. for each type, try to find a less @@ -1518,26 +1832,18 @@ bool is_superset( // to do a breadth first search to find a counterexample switch (t) { case json_type::string: - if (!is_string_superset(older, newer)) { - return false; - } + res.merge(is_string_superset(older, newer, p)); break; case json_type::integer: [[fallthrough]]; case json_type::number: - if (!is_numeric_superset(older, newer)) { - return false; - } + res.merge(is_numeric_superset(older, newer, p)); break; case json_type::object: - if (!is_object_superset(ctx, older, newer)) { - return false; - } + res.merge(is_object_superset(ctx, older, newer, p)); break; case json_type::array: - if (!is_array_superset(ctx, older, newer)) { - return false; - } + res.merge(is_array_superset(ctx, older, newer, p)); break; case json_type::boolean: // no check needed for boolean; @@ -1548,17 +1854,9 @@ bool is_superset( } } - if (!is_enum_superset(older, newer)) { - return false; - } - - if (!is_not_combinator_superset(ctx, older, newer)) { - return false; - } - - if (!is_positive_combinator_superset(ctx, older, newer)) { - return false; - } + res.merge(is_enum_superset(older, newer, p)); + res.merge(is_not_combinator_superset(ctx, older, newer, p)); + res.merge(is_positive_combinator_superset(ctx, older, newer, p)); for (auto not_yet_handled_keyword : { // draft 6 unhandled keywords: @@ -1580,7 +1878,7 @@ bool is_superset( } // no rule in newer is less strict than older, older is superset of newer - return true; + return res; } void sort(json::Value& val) { @@ -1650,16 +1948,15 @@ ss::future make_canonical_json_schema( compatibility_result check_compatible( const json_schema_definition& reader, const json_schema_definition& writer, - verbose is_verbose [[maybe_unused]]) { - auto is_compatible = [&]() { + verbose is_verbose) { + auto raw_compat_result = [&]() { // reader is a superset of writer iff every schema that is valid for // writer is also valid for reader context ctx{.older{reader()}, .newer{writer()}}; - return is_superset(ctx, reader().ctx.doc, writer().ctx.doc); + return is_superset(ctx, reader().ctx.doc, writer().ctx.doc, "#/"); }(); - // TODO(gellert.nagy): start using the is_verbose flag in a follow up PR - return compatibility_result{.is_compat = is_compatible}; + return std::move(raw_compat_result)(is_verbose); } } // namespace pandaproxy::schema_registry diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 1e58d56d7eb6d..58d894a1c85ba 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -7,27 +7,48 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0 +#include "pandaproxy/schema_registry/compatibility.h" #include "pandaproxy/schema_registry/error.h" #include "pandaproxy/schema_registry/errors.h" #include "pandaproxy/schema_registry/exceptions.h" #include "pandaproxy/schema_registry/json.h" #include "pandaproxy/schema_registry/sharded_store.h" +#include "pandaproxy/schema_registry/test/compatibility_common.h" #include "pandaproxy/schema_registry/types.h" #include #include #include +#include #include #include namespace pp = pandaproxy; namespace pps = pp::schema_registry; +using incompat_t = pps::json_incompatibility_type; +using incompatibility = pps::json_incompatibility; bool check_compatible( const pps::json_schema_definition& reader_schema, const pps::json_schema_definition& writer_schema) { - return pps::check_compatible(reader_schema, writer_schema).is_compat; + auto res = pps::check_compatible( + reader_schema, writer_schema, pps::verbose::yes); + return res.is_compat; +} + +pps::compatibility_result check_compatible_verbose( + const pps::canonical_schema_definition& r, + const pps::canonical_schema_definition& w) { + pps::sharded_store s; + return check_compatible( + pps::make_json_schema_definition( + s, {pps::subject("r"), {r.shared_raw(), pps::schema_type::json}}) + .get(), + pps::make_json_schema_definition( + s, {pps::subject("w"), {w.shared_raw(), pps::schema_type::json}}) + .get(), + pps::verbose::yes); } struct store_fixture { @@ -306,141 +327,155 @@ SEASTAR_THREAD_TEST_CASE(test_json_schema_references) { struct compatibility_test_case { std::string_view reader_schema; std::string_view writer_schema; - bool reader_is_compatible_with_writer; + std::vector compat_result; bool expected_exception = false; }; -static constexpr auto compatibility_test_cases = std::to_array< - compatibility_test_case>({ +static const auto compatibility_test_cases = std::to_array({ //***** not compatible section ***** // atoms { .reader_schema = "false", .writer_schema = "true", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::type_changed}}, }, // not allowed promotion { .reader_schema = R"({"type": "integer"})", .writer_schema = R"({"type": "number"})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::type_narrowed}}, }, { .reader_schema = R"({"type": ["integer", "string"]})", .writer_schema = R"({"type": ["number", "string"]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::type_narrowed}}, }, // not allowed new types { .reader_schema = R"({"type": "integer"})", .writer_schema = R"({})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::type_narrowed}}, }, { .reader_schema = R"({"type": "boolean"})", .writer_schema = R"({"type": ["boolean", "null"]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::type_narrowed}}, }, // not allowed numeric evolutions { .reader_schema = R"({"type": "number", "minimum": 11.2})", .writer_schema = R"({"type": "number", "maximum": 30})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/minimum", incompat_t::minimum_added}}, }, { .reader_schema = R"({"type": "number", "minimum": 1.1, "maximum": 3.199})", .writer_schema = R"({"type": "integer", "minimum": 1.1, "maximum": 3.2})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/maximum", incompat_t::maximum_decreased}}, }, { .reader_schema = R"({"type": "number", "multipleOf": 10})", .writer_schema = R"({"type": "number", "multipleOf": 21})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/multipleOf", incompat_t::multiple_of_changed}}, }, - { - .reader_schema = R"({"type": "number", "multipleOf": 20})", - .writer_schema = R"({"type": "number", "multipleOf": 10})", - .reader_is_compatible_with_writer = false, + {.reader_schema = R"({"type": "number", "multipleOf": 20})", + .writer_schema = R"({"type": "number", "multipleOf": 10})", + .compat_result = { + // TODO: should be multiple_of_expanded + {"#/multipleOf", incompat_t::multiple_of_changed}}, }, { .reader_schema = R"({"type": "number", "multipleOf": 10.1})", .writer_schema = R"({"type": "number", "multipleOf": 20.2001})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/multipleOf", incompat_t::multiple_of_changed}}, + }, + { + .reader_schema = R"({"type": "number", "multipleOf": 20})", + .writer_schema = R"({"type": "number"})", + .compat_result = {{"#/multipleOf", incompat_t::multiple_of_added}}, }, { .reader_schema = R"({"type": "number", "maximum": 10, "exclusiveMaximum": true})", .writer_schema = R"({"type": "number", "maximum": 10})", - .reader_is_compatible_with_writer = false, + // Note: this is incorrectly accepted as compatible by the reference impl + .compat_result = {{"#/exclusiveMaximum", incompat_t::exclusive_maximum_added}}, }, { .reader_schema = R"({"type": "number", "exclusiveMaximum": 10})", .writer_schema = R"({"type": "number"})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/exclusiveMaximum", incompat_t::exclusive_maximum_added}}, }, { .reader_schema = R"({"type": "number", "maximum": 10, "exclusiveMaximum": true})", .writer_schema = R"({"type": "number", "maximum": 10, "exclusiveMaximum": 10})", - .reader_is_compatible_with_writer = false, .expected_exception = true, }, { .reader_schema = R"({"type": "number", "exclusiveMinimum": 10})", .writer_schema = R"({"type": "number", "exclusiveMinimum": 9})", - .reader_is_compatible_with_writer = false, + .compat_result = {{ "#/exclusiveMinimum", incompat_t::exclusive_minimum_increased}}, }, { .reader_schema = R"({"type": "number", "exclusiveMaximum": 9})", .writer_schema = R"({"type": "number", "exclusiveMaximum": 10})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/exclusiveMaximum", incompat_t::exclusive_maximum_decreased}}, }, // string checks { .reader_schema = R"({"type": "string", "minLength": 2})", .writer_schema = R"({"type": "string", "minLength": 1})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/minLength", incompat_t::min_length_increased}}, }, // string + pattern check: reader regex is a superset of writer regex, but // the rules specify to reject this { .reader_schema = R"({"type": "string", "pattern": "^test +"})", .writer_schema = R"({"type": "string", "pattern": "^test +"})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/pattern", incompat_t::pattern_changed}}, + }, + // string + pattern check: pattern removed + { + .reader_schema = R"({"type": "string", "pattern": "^test +"})", + .writer_schema = R"({"type": "string"})", + .compat_result = {{"#/pattern", incompat_t::pattern_added}}, }, // enum checks { .reader_schema = R"({"type": "integer", "enum": [1, 2, 4]})", .writer_schema = R"({"type": "integer", "enum": [4, 1, 3]})", - .reader_is_compatible_with_writer = false, + // Note: this is reported as combined_type_subschemas_changed by the reference impl + .compat_result = {{"#/enum",incompat_t::enum_array_changed}}, }, // objects checks: size increase is not allowed { - .reader_schema - = R"({"type": "object", "minProperties": 2, "maxProperties": 10})", + .reader_schema = R"({"type": "object", "minProperties": 2, "maxProperties": 10})", .writer_schema = R"({"type": "object", "maxProperties": 11})", - .reader_is_compatible_with_writer = false, + .compat_result = { + {"#/minProperties", incompat_t::min_properties_added}, + {"#/maxProperties", incompat_t::max_properties_decreased}, + }, }, // objects checks: additional properties not compatible { .reader_schema = R"({"type": "object", "additionalProperties": false})", .writer_schema = R"({"type": "object", "additionalProperties": {"type": "string"}})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/additionalProperties", incompat_t::additional_properties_removed}}, }, { .reader_schema = R"({"type": "object", "additionalProperties": {"type": "string"}})", .writer_schema = R"({"type": "object" })", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/additionalProperties", incompat_t::additional_properties_narrowed}}, }, // object checks: new properties not compatible for a closed reader { .reader_schema = R"({"type": "object", "additionalProperties": false})", .writer_schema = R"({"type": "object", "properties": {"a": {"type": "null"}}, "additionalProperties": false})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/properties/a", incompat_t::property_removed_from_closed_content_model}}, }, // object checks: existing properties need to be compatible { @@ -448,7 +483,7 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"type": "object", "properties": {"aaaa": {"type": "integer"}}})", .writer_schema = R"({"type": "object", "properties": {"aaaa": {"type": "string"}}})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/properties/aaaa", incompat_t::type_changed}}, }, // object checks: new properties need to be compatible { @@ -456,7 +491,21 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"type": "object", "patternProperties": {"^a": {"type": "integer"}}})", .writer_schema = R"({"type": "object", "properties": {"aaaa": {"type": "string"}}})", - .reader_is_compatible_with_writer = false, + .compat_result = { + {"#/properties/aaaa", incompat_t::type_changed}, + {"#/properties/aaaa", incompat_t::property_removed_not_covered_by_partially_open_content_model} + }, + }, + // object checks: new properties need to be compatible with all old patternProperties + { + .reader_schema + = R"({"type": "object", "patternProperties": {"^a": {"type": "integer"}, "^b": {"type": "string"}, "^b": {"type": "integer"}}})", + .writer_schema + = R"({"type": "object", "properties": {"bbbb": {"type": "string"}}})", + .compat_result = { + {"#/properties/bbbb", incompat_t::type_changed}, + {"#/properties/bbbb", incompat_t::property_removed_not_covered_by_partially_open_content_model} + }, }, // object checks: dependencies removed { @@ -466,7 +515,10 @@ static constexpr auto compatibility_test_cases = std::to_array< "dependencies": {"a": ["b"], "b": ["a"]} })", .writer_schema = R"({"type": "object"})", - .reader_is_compatible_with_writer = false, + .compat_result = { + {"#/dependencies/a", incompat_t::dependency_array_added}, + {"#/dependencies/b", incompat_t::dependency_array_added}, + }, }, // object checks: dependencies missing members { @@ -480,7 +532,7 @@ static constexpr auto compatibility_test_cases = std::to_array< "type": "object", "dependencies": {"a": ["b"]} })", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/dependencies/b", incompat_t::dependency_array_added}}, }, // object checks: dependencies missing value in string array { @@ -494,7 +546,7 @@ static constexpr auto compatibility_test_cases = std::to_array< "type": "object", "dependencies": {"a": ["b"]} })", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/dependencies/a", incompat_t::dependency_array_extended}}, }, // object checks: dependencies incompatible schemas { @@ -508,31 +560,90 @@ static constexpr auto compatibility_test_cases = std::to_array< "type": "object", "dependencies": {"a": {"type": "number"}} })", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/dependencies/a", incompat_t::type_narrowed}}, + }, + // object checks: dependency "a" changed from array to object + { + .reader_schema = R"( +{ + "type": "object", + "dependencies": {"a": ["b"]} +})", + .writer_schema = R"( +{ + "type": "object", + "dependencies": {"a": {"type": "number"}} +})", + .compat_result = {{"#/dependencies/a", incompat_t::dependency_array_added}}, + }, + // object checks: dependency "a" changed from object to array + { + .reader_schema = R"( +{ + "type": "object", + "dependencies": {"a": {"type": "number"}} +})", + .writer_schema = R"( +{ + "type": "object", + "dependencies": {"a": ["b"]} +})", + .compat_result = {{"#/dependencies/a", incompat_t::dependency_schema_added}}, + }, + // object checks: dependency "a" changed from array to object + { + .reader_schema = R"( +{ + "type": "object", + "dependencies": {"a": ["b"]} +})", + .writer_schema = R"( +{ + "type": "object", + "dependencies": {"a": {"type": "number"}} +})", + .compat_result = {{"#/dependencies/a", incompat_t::dependency_array_added}}, + }, + // object checks: dependency "a" changed from object to array + { + .reader_schema = R"( +{ + "type": "object", + "dependencies": {"a": {"type": "number"}} +})", + .writer_schema = R"( +{ + "type": "object", + "dependencies": {"a": ["b"]} +})", + .compat_result = {{"#/dependencies/a", incompat_t::dependency_schema_added}}, }, // array checks: size increase is not allowed { .reader_schema = R"({"type": "array", "minItems": 2, "maxItems": 10})", .writer_schema = R"({"type": "array", "maxItems": 11})", - .reader_is_compatible_with_writer = false, + .compat_result = { + {"#/minItems", incompat_t::min_items_added}, + {"#/maxItems", incompat_t::max_items_decreased}, + }, }, // array checks: uniqueItems must be compatible { .reader_schema = R"({"type": "array", "uniqueItems": true})", .writer_schema = R"({"type": "array"})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/uniqueItems", incompat_t::unique_items_added}}, }, // array checks: "items" = schema must be superset { .reader_schema = R"({"type": "array", "items": {"type": "boolean"}})", .writer_schema = R"({"type": "array", "items": {"type": "integer"}})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/items", incompat_t::type_changed}}, }, // array checks: "items": array schema should be compatible { .reader_schema = R"({"type": "array", "items": [{"type":"boolean"}]})", .writer_schema = R"({"type": "array", "items": [{"type":"integer"}]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/items/0", incompat_t::type_changed}}, }, // array checks: "items": array schema additionalItems should be compatible { @@ -540,7 +651,7 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"type": "array", "additionalItems": {"type": "boolean"}, "items": [{"type":"boolean"}]})", .writer_schema = R"({"type": "array", "additionalItems": {"type": "integer"}, "items": [{"type":"boolean"}]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/additionalItems", incompat_t::type_changed}}, }, // array checks: "items": array schema extra elements should be compatible { @@ -548,21 +659,27 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"type": "array", "additionalItems": {"type": "integer"}, "items": [{"type":"boolean"}]})", .writer_schema = R"({"type": "array", "additionalItems": false, "items": [{"type":"boolean"}, {"type": "number"}]})", - .reader_is_compatible_with_writer = false, + .compat_result = { + {"#/items/1", incompat_t::type_narrowed}, + {"#/items/1", incompat_t::item_removed_not_covered_by_partially_open_content_model}, + }, }, { .reader_schema = R"({"type": "array", "additionalItems": {"type": "number"}, "items": [{"type":"boolean"}, {"type": "integer"}]})", .writer_schema = R"({"type": "array", "additionalItems": {"type": "number"}, "items": [{"type":"boolean"}]})", - .reader_is_compatible_with_writer = false, + .compat_result = { + {"#/items/1", incompat_t::type_narrowed}, + {"#/items/1", incompat_t::item_added_not_covered_by_partially_open_content_model}, + }, }, // combinators: "not" is required on both schema { .reader_schema = R"({"not": {"type": ["string", "integer", "number", "object", "array", "null"]}})", .writer_schema = R"({"type": "boolean"})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::type_changed}}, }, // combinators: "not" subschema has to get less strict { @@ -570,36 +687,37 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"type": ["integer", "number"], "not": {"type": "number"}})", .writer_schema = R"({"type": ["integer", "number"], "not": {"type": "integer"}})", - .reader_is_compatible_with_writer = false, + // Note: this is reported as combined_type_changed at '#/' by the reference impl + .compat_result = {{"#/not", incompat_t::not_type_extended}}, }, // positive combinators: multiple combs { .reader_schema = R"({"type": "integer", "oneOf": [true], "anyOf": [true]})", .writer_schema = R"({"type": "integer"})", - .reader_is_compatible_with_writer = false, .expected_exception = true, }, // positive combinators: mismatch of type { .reader_schema = R"({"type": "integer", "anyOf": [true]})", .writer_schema = R"({"type": "integer"})", - .reader_is_compatible_with_writer = false, + // Note: this is reported as type_changed by the reference impl + .compat_result = {{"#/", incompat_t::combined_type_changed}}, }, // positive combinators: mismatch of size { .reader_schema = R"({"allOf": [true, false]})", .writer_schema = R"({"allOf": [true]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::product_type_extended}}, }, { .reader_schema = R"({"anyOf": [true]})", .writer_schema = R"({"anyOf": [true, false]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::sum_type_narrowed}}, }, { .reader_schema = R"({"oneOf": [true]})", .writer_schema = R"({"oneOf": [true, false]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::sum_type_narrowed}}, }, // positive combinators: subschema mismatch { @@ -608,120 +726,165 @@ static constexpr auto compatibility_test_cases = std::to_array< .reader_schema = R"({"oneOf": [{"type":"number"}, {"type": "boolean"}]})", .writer_schema = R"({"oneOf": [{"type":"number", "multipleOf": 10}, {"type": "number", "multipleOf": 1.1}]})", - .reader_is_compatible_with_writer = false, + // Note: this is reported as combined_type_changed by the reference impl + .compat_result = {{"#/", incompat_t::combined_type_subschemas_changed}}, }, + // object checks: removing a required property not ok if didn't have a default value + { + .reader_schema = R"( +{ + "type": "object", + "properties": { + "a": {"type": "integer"} + }, + "required": ["a"] +})", + .writer_schema = R"( +{ + "type": "object", + "properties": { + "a": {"type": "integer"} + } +})", + .compat_result = {{"#/required/a", incompat_t::required_attribute_added}}, + }, + // Both type-specific and combined checks fail + { + .reader_schema = R"( +{ + "type": ["number", "object"], + "minimum": 20, + "minProperties": 10, + "oneOf": [ + { "multipleOf": 10 }, + { "multipleOf": 3 } + ] +})", + .writer_schema = R"( +{ + "type": ["number", "object"], + "minimum": 10, + "oneOf": [ + { "multipleOf": 5 }, + { "multipleOf": 3 } + ] +})", + // Note: the reference implementation only reports combined_type_subschemas_changed here + .compat_result = {{"#/minimum", incompat_t::minimum_increased}, {"#/minProperties", incompat_t::min_properties_added}, {"#/", incompat_t::combined_type_subschemas_changed}}, + }, + //***** compatible section ***** // atoms { .reader_schema = "true", .writer_schema = "false", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // same type { .reader_schema = R"({"type": "boolean"})", .writer_schema = R"({"type": "boolean"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "null"})", .writer_schema = R"({"type": "null"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // restrict types { .reader_schema = R"({"type": ["null", "boolean"]})", .writer_schema = R"({"type": "null"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": ["number", "boolean"], "minimum": 10.2})", .writer_schema = R"({"type": ["integer", "boolean"], "minimum": 11})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // numeric checks { .reader_schema = R"({"type": "number"})", .writer_schema = R"({"type": "number", "minimum": 11, "exclusiveMinimum": true})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number"})", .writer_schema = R"({"type": "number", "exclusiveMaximum": 10})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "maximum": 10, "exclusiveMaximum": false})", .writer_schema = R"({"type": "number", "maximum": 10})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "exclusiveMinimum": 10})", .writer_schema = R"({"type": "number", "exclusiveMinimum": 10})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "exclusiveMinimum": 10})", .writer_schema = R"({"type": "number", "exclusiveMinimum": 11})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "exclusiveMaximum": 10})", .writer_schema = R"({"type": "number", "exclusiveMaximum": 10})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "exclusiveMaximum": 10})", .writer_schema = R"({"type": "number", "exclusiveMaximum": 8})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number"})", .writer_schema = R"({"type": "number", "maximum": 11})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "minimum": 1.1, "maximum": 4})", .writer_schema = R"({"type": "number", "minimum": 1.1, "maximum": 3.2})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "multipleOf": 10})", .writer_schema = R"({"type": "number", "multipleOf": 20})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "multipleOf": 10.1})", .writer_schema = R"({"type": "number", "multipleOf": 20.2})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "number", "multipleOf": 1.1})", .writer_schema = R"({"type": "number", "multipleOf": 3.3})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // string checks { .reader_schema = R"({"type": "string", "minLength": 0})", .writer_schema = R"({"type": "string"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "string"})", .writer_schema = R"({"type": "string", "minLength": 1, "maxLength": 10})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "string", "minLength": 1})", .writer_schema = R"({"type": "string", "minLength": 2})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"type": "string", "pattern": "^test"})", .writer_schema = R"({"type": "string", "pattern": "^test"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // metadata is ignored { @@ -729,13 +892,13 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"title": "myschema", "description": "this is my schema", "default": true, "type": "boolean"})", .writer_schema = R"({"title": "MySchema", "description": "this schema is mine", "default": false, "type": "boolean"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // enum checks { .reader_schema = R"({"type": "integer", "enum": [1, 2, 4]})", .writer_schema = R"({"type": "integer", "enum": [4, 1]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"( @@ -748,7 +911,7 @@ static constexpr auto compatibility_test_cases = std::to_array< "type": "object", "additionalProperties": false })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // objects checks: // - size decrease @@ -790,7 +953,7 @@ static constexpr auto compatibility_test_cases = std::to_array< "required": ["aaaa", "cccc"], "dependencies": {"a": ["b", "c", "d"], "b": {"type": "integer"}, "d": ["a"]} })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // object checks: a new required property is ok if it has a default value { @@ -816,19 +979,19 @@ static constexpr auto compatibility_test_cases = std::to_array< "a" ] })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // array checks: same { .reader_schema = R"({"type": "array"})", .writer_schema = R"({"type": "array"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // array checks: uniqueItems explicit value to false { .reader_schema = R"({"type": "array", "uniqueItems": false})", .writer_schema = R"({"type": "array"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // array checks: // - size decrease @@ -862,7 +1025,7 @@ static constexpr auto compatibility_test_cases = std::to_array< ], "additionalItems": {"type": "integer"} })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // array checks: excess elements are absorbed by additionalItems { @@ -891,7 +1054,7 @@ static constexpr auto compatibility_test_cases = std::to_array< ], "additionalItems": false })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"( @@ -919,7 +1082,7 @@ static constexpr auto compatibility_test_cases = std::to_array< "type": "integer" } })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"( @@ -945,7 +1108,7 @@ static constexpr auto compatibility_test_cases = std::to_array< ], "additionalItems": false })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // array checks: prefixItems/items are compatible across drafts { @@ -970,7 +1133,7 @@ static constexpr auto compatibility_test_cases = std::to_array< ], "additionalItems": false })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({ @@ -994,7 +1157,7 @@ static constexpr auto compatibility_test_cases = std::to_array< "items": false })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // combinators: "not" { @@ -1002,7 +1165,7 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"type": "integer", "not": {"type": "integer", "minimum": 10}})", .writer_schema = R"({"type": "integer", "not": {"type": "integer", "minimum": 5}})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // positive combinators { @@ -1010,7 +1173,7 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"oneOf": [{"type":"number", "multipleOf": 3}, {"type": "boolean"}]})", .writer_schema = R"({"oneOf": [{"type":"boolean"}, {"type": "number", "multipleOf": 9}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // positive combinators special case: only writer has a combinator { @@ -1022,61 +1185,61 @@ static constexpr auto compatibility_test_cases = std::to_array< {"type": "object", "properties": {"d": {"type": "array"}}} ] })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // positive combinators single schema cases: the actual combinator does not // matter { .reader_schema = R"({"anyOf": [{"type": "number"}]})", .writer_schema = R"({"oneOf": [{"type": "integer"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"allOf": [{"type": "number"}]})", .writer_schema = R"({"oneOf": [{"type": "integer"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"oneOf": [{"type": "number"}]})", .writer_schema = R"({"anyOf": [{"type": "integer"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"allOf": [{"type": "number"}]})", .writer_schema = R"({"anyOf": [{"type": "integer"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"anyOf": [{"type": "number"}]})", .writer_schema = R"({"allOf": [{"type": "integer"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"oneOf": [{"type": "number"}]})", .writer_schema = R"({"allOf": [{"type": "integer"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // positive combinators older is single schema, newer is allOf { .reader_schema = R"({"oneOf": [{"type": "number"}]})", .writer_schema = R"({"allOf": [{"type": "integer"}, {"type": "string"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"anyOf": [{"type": "number"}]})", .writer_schema = R"({"allOf": [{"type": "integer"}, {"type": "string"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // positive combinators older is oneOf, newer is single schema { .reader_schema = R"({"oneOf": [{"type": "number"}, {"type": "string"}]})", .writer_schema = R"({"allOf": [{"type": "integer"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"oneOf": [{"type": "number"}, {"type": "string"}]})", .writer_schema = R"({"anyOf": [{"type": "integer"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // positive combinators special cases: anyOf in reader can be compared with // the other @@ -1087,21 +1250,21 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"anyOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", .writer_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { // smoke test smaller reader is not compatible .reader_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}]})", .writer_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::sum_type_narrowed}}, }, { // smoke test bigger reader is compatible .reader_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", .writer_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { // oneOf in writer can only be smaller or equal @@ -1109,21 +1272,21 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"anyOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", .writer_schema = R"({"oneOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { // oneOf in writer can only be smaller or equal .reader_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}]})", .writer_schema = R"({"oneOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", - .reader_is_compatible_with_writer = false, + .compat_result = {{"#/", incompat_t::sum_type_narrowed}}, }, { // oneOf in writer can only be smaller or equal .reader_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", .writer_schema = R"({"oneOf": [{"type": "number"}, {"type": "string"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { // allOf in writer can be compatible with any cardinality @@ -1131,40 +1294,40 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"anyOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", .writer_schema = R"({"allOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { // allOf in writer can be compatible with any cardinality .reader_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}]})", .writer_schema = R"({"allOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { // allOf in writer can be compatible with any cardinality .reader_schema = R"({"anyOf": [{"type": "number"}, {"type": "string"}, {"type": "boolean"}]})", .writer_schema = R"({"allOf": [{"type": "number"}, {"type": "string"}]})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // dialects { .reader_schema = R"({"$schema": "http://json-schema.org/draft-06/schema"})", .writer_schema = R"({"$schema": "http://json-schema.org/draft-06/schema#"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // different dialects { .reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})", .writer_schema = R"({"$schema": "http://json-schema.org/draft-06/schema#"})", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})", .writer_schema = "true", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // array checks: prefixItems/items are compatible across drafts, unspecified // schema @@ -1188,7 +1351,7 @@ static constexpr auto compatibility_test_cases = std::to_array< ], "additionalItems": false })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, { .reader_schema = R"({ @@ -1210,7 +1373,7 @@ static constexpr auto compatibility_test_cases = std::to_array< "items": false })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, // object: "patternProperties" is not involved in compat checks { @@ -1238,7 +1401,7 @@ static constexpr auto compatibility_test_cases = std::to_array< } } })", - .reader_is_compatible_with_writer = true, + .compat_result = {}, }, }); SEASTAR_THREAD_TEST_CASE(test_compatibility_check) { @@ -1257,7 +1420,7 @@ SEASTAR_THREAD_TEST_CASE(test_compatibility_check) { "reader: {}, writer: {}, is compatible: {}", data.reader_schema, data.writer_schema, - data.reader_is_compatible_with_writer)) { + data.compat_result.empty())) { try { // sanity check that each schema is compatible with itself BOOST_CHECK_MESSAGE( @@ -1276,11 +1439,20 @@ SEASTAR_THREAD_TEST_CASE(test_compatibility_check) { data.writer_schema)); // check compatibility (or not) reader->writer + auto expected = [&data] { + pps::raw_compatibility_result res{}; + for (auto inc : data.compat_result) { + res.emplace(inc); + } + return std::move(res)(pps::verbose::yes); + }; + BOOST_CHECK_EQUAL( - data.reader_is_compatible_with_writer, - ::check_compatible( + expected(), + pps::check_compatible( make_json_schema(data.reader_schema), - make_json_schema(data.writer_schema))); + make_json_schema(data.writer_schema), + pps::verbose::yes)); } catch (...) { BOOST_CHECK_MESSAGE( data.expected_exception, @@ -1292,3 +1464,96 @@ SEASTAR_THREAD_TEST_CASE(test_compatibility_check) { } }; } + +namespace { + +const auto schema_old = pps::canonical_schema_definition({ + R"( +{ + "type": "object", + "minProperties": 2, + "maxProperties": 9, + "properties": { + "aaaa": {"type": "integer"}, + "bbbb": {"type": "string"}, + "cccc": {"type": "boolean"} + }, + "patternProperties": { + "^b": {"type": "string", "minLength":10} + }, + "additionalProperties": false, + "required": ["aaaa", "cccc"], + "dependencies": {"a": ["b", "c", "d"], "b": {"type": "integer"}, "d": ["a"]} +})", + pps::schema_type::json}); + +const auto schema_new = pps::canonical_schema_definition({ + R"( +{ + "type": "object", + "minProperties": 3, + "maxProperties": 10, + "properties": { + "aaaa": {"type": "number"} + }, + "patternProperties": { + "^b": {"type": "string"} + }, + "additionalProperties": {"type": "boolean"}, + "required": ["aaaa"], + "dependencies": {"a":["c", "b"], "b": {"type": "number"}} +})", + pps::schema_type::json}); + +const absl::flat_hash_set forward_expected{ + {"#/properties/aaaa", incompat_t::type_narrowed}, + // TODO: implement the check for + // required_property_added_to_unopen_content_model. That is also expected + // for this schema update. + // {"#/properties/cccc", + // incompat_t::required_property_added_to_unopen_content_model}, + {"#/dependencies/a", incompat_t::dependency_array_extended}, + {"#/dependencies/d", incompat_t::dependency_array_added}, + {"#/dependencies/b", incompat_t::type_narrowed}, + {"#/additionalProperties", incompat_t::additional_properties_removed}, + {"#/maxProperties", incompat_t::max_properties_decreased}, +}; +const absl::flat_hash_set backward_expected{ + {"#/minProperties", incompat_t::min_properties_increased}, +}; + +const auto compat_data = std::to_array>({ + { + schema_old.share(), + schema_new.share(), + forward_expected, + }, + { + schema_new.share(), + schema_old.share(), + backward_expected, + }, +}); + +std::string format_set(const absl::flat_hash_set& d) { + return fmt::format("{}", fmt::join(d, "\n")); +} + +} // namespace + +SEASTAR_THREAD_TEST_CASE(test_json_compat_messages) { + for (const auto& cd : compat_data) { + auto compat = check_compatible_verbose(cd.reader, cd.writer); + + absl::flat_hash_set errs{ + compat.messages.begin(), compat.messages.end()}; + absl::flat_hash_set expected{ + cd.expected.messages.begin(), cd.expected.messages.end()}; + + BOOST_CHECK(!compat.is_compat); + BOOST_CHECK_EQUAL(errs.size(), expected.size()); + BOOST_REQUIRE_MESSAGE( + errs == expected, + fmt::format("{} != {}", format_set(errs), format_set(expected))); + } +} diff --git a/src/v/pandaproxy/schema_registry/types.cc b/src/v/pandaproxy/schema_registry/types.cc index 4187d6374efc9..ab262f84d3c30 100644 --- a/src/v/pandaproxy/schema_registry/types.cc +++ b/src/v/pandaproxy/schema_registry/types.cc @@ -88,4 +88,9 @@ std::ostream& operator<<(std::ostream& os, const canonical_schema& ref) { return os; } +std::ostream& operator<<(std::ostream& os, const compatibility_result& res) { + fmt::print(os, "is_compat: {}, messages: {}", res.is_compat, res.messages); + return os; +} + } // namespace pandaproxy::schema_registry diff --git a/src/v/pandaproxy/schema_registry/types.h b/src/v/pandaproxy/schema_registry/types.h index 5edf4e563d08f..d43b79a2a8f6e 100644 --- a/src/v/pandaproxy/schema_registry/types.h +++ b/src/v/pandaproxy/schema_registry/types.h @@ -534,6 +534,11 @@ from_string_view(std::string_view sv) { } struct compatibility_result { + friend bool + operator==(const compatibility_result&, const compatibility_result&) + = default; + friend std::ostream& operator<<(std::ostream&, const compatibility_result&); + bool is_compat; std::vector messages; }; diff --git a/tests/rptest/tests/schema_registry_test.py b/tests/rptest/tests/schema_registry_test.py index ec38422f7dc2c..394149ed6a438 100644 --- a/tests/rptest/tests/schema_registry_test.py +++ b/tests/rptest/tests/schema_registry_test.py @@ -199,6 +199,8 @@ def get_subject_name(sns: str, topic: str, field: MessageField, ] } """, + json=r'{"type": "number"}', + json_incompat=r'{"type": "number", "multipleOf": 20}', ) log_config = LoggingConfig('info', @@ -1570,6 +1572,7 @@ def test_post_compatibility_subject_version_transitive_order(self): @parametrize(schemas=("avro", "avro_incompat", "AVRO")) @parametrize(schemas=("proto3", "proto3_incompat", "PROTOBUF")) @parametrize(schemas=("proto2", "proto2_incompat", "PROTOBUF")) + @parametrize(schemas=("json", "json_incompat", "JSON")) def test_compatibility_messages(self, schemas): """ Verify compatibility messages