Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions src/input/input_json.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::borrow::Cow;
use std::collections::HashSet;

use jiter::{JsonArray, JsonObject, JsonValue};
use num_traits::cast::ToPrimitive;
Expand Down Expand Up @@ -59,18 +60,35 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> {
}

fn as_kwargs(&self, py: Python<'py>) -> Option<Bound<'py, PyDict>> {
match self {
JsonValue::Object(object) => {
let dict = PyDict::new(py);
for (k, v) in object.as_slice() {
// TODO: jiter doesn't deduplicate keys, so we should probably do that here to
// avoid potential wasted work creating Python objects.
dict.set_item(k, v).unwrap();
let JsonValue::Object(object) = self else {
return None;
};

// deduplicate keys before creating objects to avoid wasted work
// jiter doesn't deduplicate keys, so duplicate keys in JSON will appear multiple times
// in the slice. We iterate backwards to keep only the last value for each key while preserving order
let unique_indices_reversed = {
let mut seen = HashSet::with_capacity(object.len());
let mut unique = Vec::with_capacity(object.len());

for (i, (k, _)) in object.as_slice().iter().enumerate().rev() {
if seen.insert(k) {
unique.push(i);
}
Some(dict)
}
_ => None,

unique
};

let object = object.as_slice();

let dict = PyDict::new(py);
for &i in unique_indices_reversed.iter().rev() {
let (k, v) = &object[i];
dict.set_item(k, v).unwrap();
}

Some(dict)
}

type Arguments<'a>
Expand Down
43 changes: 43 additions & 0 deletions tests/validators/test_dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -1845,3 +1845,46 @@ class MyDataclass:
assert dataclasses.asdict(
s.validate_python({'my_field': 1}, by_alias=runtime_by_alias, by_name=runtime_by_name)
) == {'my_field': 1}


def test_dataclass_json_duplicate_keys():
"""Test that duplicate keys in JSON are handled correctly (last value wins).

We want to ensure that:
1. The last value for a duplicate key is used (standard JSON behavior)
2. We don't waste work creating Python objects for values that get overwritten
"""

@dataclasses.dataclass
class MyDataclass:
name: str
age: int

schema = core_schema.dataclass_schema(
MyDataclass,
core_schema.dataclass_args_schema(
'MyDataclass',
[
core_schema.dataclass_field(name='name', schema=core_schema.str_schema()),
core_schema.dataclass_field(name='age', schema=core_schema.int_schema()),
],
),
['name', 'age'],
)
v = SchemaValidator(schema)

# json with duplicate keys - the last value should win
json_with_duplicates = '{"name": "Alice", "age": 30, "name": "Bob", "age": 25}'
result = v.validate_json(json_with_duplicates)

assert result.name == 'Bob', "Last value for 'name' should win"
assert result.age == 25, "Last value for 'age' should win"
assert dataclasses.asdict(result) == {'name': 'Bob', 'age': 25}

# test with multiple duplicates of the same key
json_multiple_duplicates = '{"name": "First", "age": 1, "name": "Second", "name": "Third", "age": 3}'
result2 = v.validate_json(json_multiple_duplicates)

assert result2.name == 'Third', 'Last value among multiple duplicates should win'
assert result2.age == 3
assert dataclasses.asdict(result2) == {'name': 'Third', 'age': 3}
Loading