Skip to content

Commit ed4a921

Browse files
authored
Feature/pdct 1790 Ensure all family metadata values are string arrays (#75)
* Ensure all metadata values are string arrays * Bump to 3.8.27 * Don't validate as string[] for all meta values
1 parent 500b4f8 commit ed4a921

File tree

3 files changed

+49
-7
lines changed

3 files changed

+49
-7
lines changed

db_client/functions/metadata.py

+26-6
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,23 @@ def validate_metadata(
5858
}
5959
else:
6060
taxonomy = get_entity_specific_taxonomy(taxonomy, entity_key)
61-
return validate_metadata_against_taxonomy(taxonomy, metadata)
61+
62+
return validate_metadata_against_taxonomy(
63+
taxonomy, metadata, bool(entity_key is None)
64+
)
6265

6366

6467
def validate_metadata_against_taxonomy(
65-
taxonomy: Union[TaxonomyData, TaxonomyDataEntry], metadata: TaxonomyDataEntry
68+
taxonomy: Union[TaxonomyData, TaxonomyDataEntry],
69+
metadata: TaxonomyDataEntry,
70+
is_family_metadata: bool = False,
6671
) -> Optional[MetadataValidationErrors]:
6772
"""Build the Corpus taxonomy for the entity & validate against it.
6873
6974
:param TaxonomyDataEntry taxonomy: The Corpus taxonomy to validate against.
7075
:param TaxonomyDataEntry metadata: The metadata to validate.
76+
:param bool is_family_metadata: Whether to validate all metadata
77+
values as string arrays.
7178
:raises TypeError: If the Taxonomy is invalid.
7279
:return Optional[MetadataValidationResult]: A list of errors or None
7380
if the metadata is valid.
@@ -79,18 +86,22 @@ def validate_metadata_against_taxonomy(
7986
# Wrap any TypeError in a more general error
8087
raise TypeError("Bad Taxonomy data in database") from e
8188

82-
errors = _validate_metadata(taxonomy_entries, metadata)
89+
errors = _validate_metadata(taxonomy_entries, metadata, is_family_metadata)
8390
return errors if len(errors) > 0 else None
8491

8592

8693
def _validate_metadata(
87-
taxonomy_entries: Mapping[str, TaxonomyEntry], metadata: Mapping
94+
taxonomy_entries: Mapping[str, TaxonomyEntry],
95+
metadata: Mapping,
96+
is_family_metadata: bool = False,
8897
) -> MetadataValidationErrors:
8998
"""Validates the metadata against the taxonomy.
9099
91100
:param _type_ taxonomy_entries: The built entries from the
92101
CorpusType.valid_metadata.
93-
:param _type_ metadata: The metadata to validate.
102+
:param Mapping metadata: The metadata to validate.
103+
:param bool is_family_metadata: Whether to validate all metadata
104+
values as string arrays.
94105
:return MetadataValidationErrors: a list of errors if the metadata
95106
is invalid.
96107
"""
@@ -108,13 +119,22 @@ def _validate_metadata(
108119
for key, value_list in metadata.items():
109120
if key not in taxonomy_entries:
110121
continue # We've already checked for missing keys
111-
taxonomy_entry = taxonomy_entries[key]
122+
112123
if not isinstance(value_list, list):
113124
errors.append(
114125
f"Invalid value '{value_list}' for metadata key '{key}' expected list."
115126
)
116127
continue
117128

129+
# Ensure all items in value_list are strings
130+
if is_family_metadata and not all(isinstance(item, str) for item in value_list):
131+
errors.append(
132+
f"Invalid value(s) in '{value_list}' for metadata key '{key}', "
133+
"expected all items to be strings."
134+
)
135+
continue
136+
137+
taxonomy_entry = taxonomy_entries[key]
118138
if not taxonomy_entry.allow_any:
119139
if not all(item in taxonomy_entry.allowed_values for item in value_list):
120140
errors.append(f"Invalid value '{value_list}' for metadata key '{key}'")

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "db-client"
3-
version = "3.8.26"
3+
version = "3.8.27"
44
description = "All things to do with the datamodel and its storage. Including alembic migrations and datamodel code."
55
authors = ["CPR-dev-team <[email protected]>"]
66
license = "Apache-2.0"

tests/functions/test_metadata.py

+22
Original file line numberDiff line numberDiff line change
@@ -224,3 +224,25 @@ def test_validation_allows_any(db):
224224
errors = validate_metadata_against_taxonomy(taxonomy, metadata)
225225

226226
assert errors is None
227+
228+
229+
def test_validation_errors_on_non_string_values(db):
230+
taxonomy = {
231+
"author_type": {
232+
"allow_blanks": False,
233+
"allowed_values": ["Party", "Non-Party"],
234+
}
235+
}
236+
metadata = {"author_type": ["Party", 123, None]}
237+
setup_test(db, taxonomy, metadata)
238+
239+
errors = validate_metadata_against_taxonomy(
240+
taxonomy, metadata, is_family_metadata=True
241+
)
242+
243+
assert errors is not None
244+
assert len(errors) == 1
245+
assert errors[0] == (
246+
"Invalid value(s) in '['Party', 123, None]' for metadata key "
247+
"'author_type', expected all items to be strings."
248+
)

0 commit comments

Comments
 (0)