Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#2913] Check for duplicate keys in ZGW imports #1508

Merged
merged 1 commit into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 43 additions & 22 deletions src/open_inwoner/openzaak/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,19 @@

class ZGWImportError(Exception):
@classmethod
def extract_error_data(cls, exc: Exception, jsonl: str):
exc_source = type(exc.__context__)
data = json.loads(jsonl) if jsonl else {}
def extract_error_data(
cls, exception: Exception | None = None, jsonl: str | None = None
):
data = json.loads(jsonl) if jsonl is not None else {}
source_config = apps.get_model(data["model"])

# error type
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned
error_type = None
if exception:
exc_source = type(exception.__context__)
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned

# metadata about source_config
items = []
Expand All @@ -60,8 +63,6 @@ def extract_error_data(cls, exc: Exception, jsonl: str):
items = [
f"omschrijving = {fields['omschrijving']!r}",
f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]!r}",
f"Catalogus domein = {fields['zaaktype_config'][1]!r}",
f"Catalogus rsin = {fields['zaaktype_config'][2]!r}",
]

return {
Expand All @@ -71,8 +72,8 @@ def extract_error_data(cls, exc: Exception, jsonl: str):
}

@classmethod
def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:
error_data = cls.extract_error_data(exception, jsonl)
def from_exception_and_jsonl(cls, jsonl: str, exception: Exception) -> Self:
error_data = cls.extract_error_data(exception=exception, jsonl=jsonl)

error_template = (
"%(source_config_name)s not found in target environment: %(info)s"
Expand All @@ -82,16 +83,26 @@ def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:

return cls(error_template % error_data)

@classmethod
def from_jsonl(cls, jsonl: str) -> Self:
error_data = cls.extract_error_data(jsonl=jsonl)
error_template = (
"%(source_config_name)s was processed multiple times because it contains "
"duplicate natural keys: %(info)s"
)
return cls(error_template % error_data)


def check_catalogus_config_exists(source_config: CatalogusConfig, jsonl: str):
try:
CatalogusConfig.objects.get_by_natural_key(
domein=source_config.domein, rsin=source_config.rsin
)
except CatalogusConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except CatalogusConfig.DoesNotExist as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except (
CatalogusConfig.DoesNotExist,
CatalogusConfig.MultipleObjectsReturned,
) as exc:
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)


def _update_config(source, target, exclude_fields):
Expand All @@ -113,10 +124,13 @@ def _update_zaaktype_config(source_config: ZaakTypeConfig, jsonl: str):
catalogus_domein=source_config.catalogus.domein,
catalogus_rsin=source_config.catalogus.rsin,
)
except ZaakTypeConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except (
CatalogusConfig.DoesNotExist,
CatalogusConfig.MultipleObjectsReturned,
ZaakTypeConfig.DoesNotExist,
ZaakTypeConfig.MultipleObjectsReturned,
) as exc:
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
else:
exclude_fields = [
"id",
Expand Down Expand Up @@ -146,7 +160,7 @@ def _update_nested_zgw_config(
catalogus_rsin=catalogus_rsin,
)
except (source_config.DoesNotExist, source_config.MultipleObjectsReturned) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
raise ZGWImportError.from_exception_and_jsonl(exception=exc, jsonl=jsonl)
else:
_update_config(source_config, target, exclude_fields)

Expand Down Expand Up @@ -293,6 +307,7 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:

rows_successfully_processed = 0
import_errors = []
natural_keys_seen = set()
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
try:
(deserialized_object,) = serializers.deserialize(
Expand All @@ -302,11 +317,17 @@ def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
use_natural_foreign_keys=True,
)
except DeserializationError as exc:
error = ZGWImportError.from_exception_and_jsonl(exc, line)
error = ZGWImportError.from_exception_and_jsonl(
exception=exc, jsonl=line
)
logger.error(error)
import_errors.append(error)
else:
source_config = deserialized_object.object
if (natural_key := source_config.natural_key()) in natural_keys_seen:
import_errors.append(ZGWImportError.from_jsonl(line))
continue
natural_keys_seen.add(natural_key)
try:
match source_config:
case CatalogusConfig():
Expand Down
10 changes: 5 additions & 5 deletions src/open_inwoner/openzaak/tests/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,18 +293,18 @@ def test_import_flow_reports_errors(self) -> None:
messages[1],
)
self.assertIn(
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'status omschrijving', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
messages[1],
)
self.assertIn(
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', ZaakTypeConfig "
"identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
"identificatie = 'ztc-id-a-0'",
messages[1],
)
self.assertIn(
"ZaakTypeInformatieObjectTypeConfig not found in target environment: omschrijving = 'informatieobject', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
messages[1],
)
self.assertIn(
Expand All @@ -313,7 +313,7 @@ def test_import_flow_reports_errors(self) -> None:
)
self.assertIn(
"ZaakTypeResultaatTypeConfig not found in target environment: omschrijving = 'resultaat', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'",
"ZaakTypeConfig identificatie = 'ztc-id-a-0'",
messages[1],
)

Expand Down
45 changes: 41 additions & 4 deletions src/open_inwoner/openzaak/tests/test_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,11 @@ def setUp(self):
'{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}',
'{"model": "openzaak.zaaktyperesultaattypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "resultaattype_url": "https://bar.maykinmedia.nl", "omschrijving": "resultaat", "zaaktype_uuids": "[]", "description": "description new"}}',
]
self.json_dupes = [
'{"model": "openzaak.zaaktypestatustypeconfig", "fields": {"zaaktype_config": ["ztc-id-a-0", "DM-0", "123456789"], "statustype_url": "https://bar.maykinmedia.nl", "omschrijving": "status omschrijving", "statustekst": "statustekst nieuw", "zaaktype_uuids": "[]", "status_indicator": "", "status_indicator_text": "", "document_upload_description": "", "description": "status", "notify_status_change": true, "action_required": false, "document_upload_enabled": true, "call_to_action_url": "", "call_to_action_text": "", "case_link_text": ""}}',
]
self.jsonl = "\n".join(self.json_lines)
self.jsonl_with_dupes = "\n".join(self.json_lines + self.json_dupes)

def test_import_jsonl_update_success(self):
mocks = ZGWExportImportMockData()
Expand Down Expand Up @@ -373,7 +377,7 @@ def test_import_jsonl_missing_statustype_config(self):
)
expected_error = ZGWImportError(
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'bogus', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'"
"ZaakTypeConfig identificatie = 'ztc-id-a-0'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
Expand Down Expand Up @@ -419,7 +423,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self):
)
expected_error = ZGWImportError(
"ZaakTypeStatusTypeConfig not found in target environment: omschrijving = 'status omschrijving', "
"ZaakTypeConfig identificatie = 'bogus', Catalogus domein = 'DM-1', Catalogus rsin = '666666666'"
"ZaakTypeConfig identificatie = 'bogus'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
Expand All @@ -445,7 +449,7 @@ def test_import_jsonl_update_statustype_config_missing_zt_config(self):
self.assertEqual(ZaakTypeStatusTypeConfig.objects.count(), 1)
self.assertEqual(ZaakTypeResultaatTypeConfig.objects.count(), 1)

def test_import_jsonl_update_reports_duplicates(self):
def test_import_jsonl_update_reports_duplicate_db_records(self):
mocks = ZGWExportImportMockData()

ZaakTypeResultaatTypeConfigFactory(
Expand All @@ -465,7 +469,7 @@ def test_import_jsonl_update_reports_duplicates(self):
)
expected_error = ZGWImportError(
"Got multiple results for ZaakTypeResultaatTypeConfig: omschrijving = 'resultaat', "
"ZaakTypeConfig identificatie = 'ztc-id-a-0', Catalogus domein = 'DM-0', Catalogus rsin = '123456789'"
"ZaakTypeConfig identificatie = 'ztc-id-a-0'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
Expand All @@ -484,6 +488,39 @@ def test_import_jsonl_update_reports_duplicates(self):
# check import
self.assertEqual(import_result, import_expected)

def test_import_jsonl_update_reports_duplicate_natural_keys_in_upload_file(self):
mocks = ZGWExportImportMockData()

self.storage.save("import.jsonl", io.StringIO(self.jsonl_with_dupes))

# we use `asdict` and replace the Exceptions with string representations
# because for Exceptions raised from within dataclasses, equality ==/is identity
import_result = dataclasses.asdict(
CatalogusConfigImport.import_from_jsonl_file_in_django_storage(
"import.jsonl", self.storage
)
)
expected_error = ZGWImportError(
"ZaakTypeStatusTypeConfig was processed multiple times because it contains duplicate "
"natural keys: omschrijving = 'status omschrijving', ZaakTypeConfig identificatie = 'ztc-id-a-0'"
)
import_expected = dataclasses.asdict(
CatalogusConfigImport(
total_rows_processed=6,
catalogus_configs_imported=1,
zaaktype_configs_imported=1,
zaak_informatie_object_type_configs_imported=1,
zaak_status_type_configs_imported=1,
zaak_resultaat_type_configs_imported=1,
import_errors=[expected_error],
),
)
import_result["import_errors"][0] = str(import_result["import_errors"][0])
import_expected["import_errors"][0] = str(import_expected["import_errors"][0])

# check import
self.assertEqual(import_result, import_expected)

def test_import_jsonl_fails_with_catalogus_domein_rsin_mismatch(self):
service = ServiceFactory(slug="service-0")
CatalogusConfigFactory(
Expand Down
Loading