Skip to content

Commit 653f723

Browse files
Deduplicate keys before populating PyDict
1 parent 3359241 commit 653f723

File tree

4 files changed

+87
-11
lines changed

4 files changed

+87
-11
lines changed

Cargo.lock

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ num-bigint = "0.4.6"
4545
num-traits = "0.2.19"
4646
uuid = "1.18.1"
4747
jiter = { version = "0.11.0", features = ["python"] }
48+
indexmap = "2.7.1"
4849
hex = "0.4.3"
4950
percent-encoding = "2.3.1"
5051

src/input/input_json.rs

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use std::borrow::Cow;
22

3+
use indexmap::IndexMap;
4+
35
use jiter::{JsonArray, JsonObject, JsonValue};
46
use num_traits::cast::ToPrimitive;
57
use pyo3::prelude::*;
@@ -59,18 +61,25 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> {
5961
}
6062

6163
fn as_kwargs(&self, py: Python<'py>) -> Option<Bound<'py, PyDict>> {
62-
match self {
63-
JsonValue::Object(object) => {
64-
let dict = PyDict::new(py);
65-
for (k, v) in object.as_slice() {
66-
// TODO: jiter doesn't deduplicate keys, so we should probably do that here to
67-
// avoid potential wasted work creating Python objects.
68-
dict.set_item(k, v).unwrap();
69-
}
70-
Some(dict)
71-
}
72-
_ => None,
64+
let JsonValue::Object(object) = self else {
65+
return None;
66+
};
67+
68+
// deduplicate keys before creating objects to avoid wasted work
69+
// jiter doesn't deduplicate keys, so duplicate keys in JSON will appear multiple times
70+
// in the slice. We use an IndexMap to keep only the last value for each key while preserving order
71+
let mut unique_object = IndexMap::with_capacity(object.len());
72+
73+
for (k, v) in object.as_slice() {
74+
unique_object.insert(k, v);
7375
}
76+
77+
let dict = PyDict::new(py);
78+
for (k, v) in unique_object {
79+
dict.set_item(k, v).unwrap();
80+
}
81+
82+
Some(dict)
7483
}
7584

7685
type Arguments<'a>

tests/validators/test_dataclasses.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,3 +1845,46 @@ class MyDataclass:
18451845
assert dataclasses.asdict(
18461846
s.validate_python({'my_field': 1}, by_alias=runtime_by_alias, by_name=runtime_by_name)
18471847
) == {'my_field': 1}
1848+
1849+
1850+
def test_dataclass_json_duplicate_keys():
1851+
"""Test that duplicate keys in JSON are handled correctly (last value wins).
1852+
1853+
We want to ensure that:
1854+
1. The last value for a duplicate key is used (standard JSON behavior)
1855+
2. We don't waste work creating Python objects for values that get overwritten
1856+
"""
1857+
1858+
@dataclasses.dataclass
1859+
class MyDataclass:
1860+
name: str
1861+
age: int
1862+
1863+
schema = core_schema.dataclass_schema(
1864+
MyDataclass,
1865+
core_schema.dataclass_args_schema(
1866+
'MyDataclass',
1867+
[
1868+
core_schema.dataclass_field(name='name', schema=core_schema.str_schema()),
1869+
core_schema.dataclass_field(name='age', schema=core_schema.int_schema()),
1870+
],
1871+
),
1872+
['name', 'age'],
1873+
)
1874+
v = SchemaValidator(schema)
1875+
1876+
# json with duplicate keys - the last value should win
1877+
json_with_duplicates = '{"name": "Alice", "age": 30, "name": "Bob", "age": 25}'
1878+
result = v.validate_json(json_with_duplicates)
1879+
1880+
assert result.name == 'Bob', "Last value for 'name' should win"
1881+
assert result.age == 25, "Last value for 'age' should win"
1882+
assert dataclasses.asdict(result) == {'name': 'Bob', 'age': 25}
1883+
1884+
# test with multiple duplicates of the same key
1885+
json_multiple_duplicates = '{"name": "First", "age": 1, "name": "Second", "name": "Third", "age": 3}'
1886+
result2 = v.validate_json(json_multiple_duplicates)
1887+
1888+
assert result2.name == 'Third', 'Last value among multiple duplicates should win'
1889+
assert result2.age == 3
1890+
assert dataclasses.asdict(result2) == {'name': 'Third', 'age': 3}

0 commit comments

Comments
 (0)