Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Commit

Permalink
Resolving PR comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark committed Mar 27, 2024
1 parent 5969c49 commit 96e062e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
6 changes: 3 additions & 3 deletions src/cpr_data_access/parser_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
BackendDocument,
Json,
)
from cpr_data_access.utils import remove_if_all_nested_vals_none, unflatten_json
from cpr_data_access.utils import remove_key_if_all_nested_vals_none, unflatten_json
from langdetect import DetectorFactory, LangDetectException, detect
from pydantic import AnyHttpUrl, BaseModel, Field, model_validator

Expand Down Expand Up @@ -369,7 +369,7 @@ def from_flat_json(data: dict):
# We remove optional fields that have complex nested structures.
# E.g. if html_data had a value of None for has_valid_text, we need to remove
# it as this would throw a validation error.
unflattened = remove_if_all_nested_vals_none(unflattened, "html_data")
unflattened = remove_if_all_nested_vals_none(unflattened, "pdf_data")
unflattened = remove_key_if_all_nested_vals_none(unflattened, "html_data")
unflattened = remove_key_if_all_nested_vals_none(unflattened, "pdf_data")

return ParserOutput.model_validate(unflattened)
8 changes: 6 additions & 2 deletions src/cpr_data_access/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,12 @@ def unflatten_json(data: dict) -> dict:
return unflattened


def remove_if_all_nested_vals_none(data: dict, key: str) -> dict:
"""Remove an item if it's a dict with all values in the nested dictionary are None."""
def remove_key_if_all_nested_vals_none(data: dict, key: str) -> dict:
"""
Remove the value for a given key if it's a dict with all None values.
E.g. {"key": {"a": None, "b": None}} -> {}
"""
if key not in data:
return data
if isinstance(data[key], dict):
Expand Down
18 changes: 8 additions & 10 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import pytest

from cpr_data_access.utils import (
dig,
is_sensitive_query,
load_sensitive_query_terms,
remove_key_if_all_nested_vals_none,
unflatten_json,
remove_if_all_nested_vals_none,
)


TEST_SENSITIVE_QUERY_TERMS = (
"word",
"test term",
Expand Down Expand Up @@ -93,15 +91,15 @@ def test_unflatten_json() -> None:
assert unflatten_json(data) == expected


def test_remove_if_all_nested_vals_none() -> None:
"""Test remove_if_all_nested_vals_none function."""
assert remove_if_all_nested_vals_none({}, "key") == {}
assert remove_if_all_nested_vals_none({"key": None}, "key") == {"key": None}
assert remove_if_all_nested_vals_none({"key": {"nested": None}}, "key") == {}
assert remove_if_all_nested_vals_none({"key": {"nested": None}}, "no_key") == {
def test_remove_key_if_all_nested_vals_none() -> None:
"""Test remove_key_if_all_nested_vals_none function."""
assert remove_key_if_all_nested_vals_none({}, "key") == {}
assert remove_key_if_all_nested_vals_none({"key": None}, "key") == {"key": None}
assert remove_key_if_all_nested_vals_none({"key": {"nested": None}}, "key") == {}
assert remove_key_if_all_nested_vals_none({"key": {"nested": None}}, "no_key") == {
"key": {"nested": None}
}
assert remove_if_all_nested_vals_none(
assert remove_key_if_all_nested_vals_none(
{
"key": {"nested": None},
"key2": {"nested": "value"},
Expand Down

0 comments on commit 96e062e

Please sign in to comment.