Skip to content

Commit

Permalink
Merge pull request #40 from ImageMarkup/isic-36-add-hierarchical-diag…
Browse files Browse the repository at this point in the history
…nosis-field
  • Loading branch information
danlamanna authored Oct 2, 2024
2 parents 9296a1a + 0201f6f commit e83d9c0
Show file tree
Hide file tree
Showing 7 changed files with 1,000 additions and 33 deletions.
7 changes: 7 additions & 0 deletions isic_metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ class Field:
es_facet={"terms": {"field": "diagnosis", "size": 100}},
)
),
"legacy_dx": Field(
search=SearchConfig(
key="legacy_dx",
es_property={"type": "keyword"},
es_facet={"terms": {"field": "legacy_dx", "size": 100}},
)
),
"mel_thick_mm": Field(
search=SearchConfig(
key="mel_thick_mm",
Expand Down
922 changes: 922 additions & 0 deletions isic_metadata/diagnosis_hierarchical.py

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion isic_metadata/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import re
from typing import Any

from isic_metadata.diagnosis_hierarchical import DiagnosisEnum # noqa: F401


class ClinSizeLongDiamMm:
@classmethod
Expand Down Expand Up @@ -58,7 +60,7 @@ class DiagnosisConfirmTypeEnum(str, Enum):
single_contributor_clinical_assessment = "single contributor clinical assessment"


class DiagnosisEnum(str, Enum):
class LegacyDxEnum(str, Enum):
actinic_keratosis = "actinic keratosis"
adnexal_tumor = "adnexal tumor"
aimp = "AIMP"
Expand Down
23 changes: 14 additions & 9 deletions isic_metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
DiagnosisEnum,
FitzpatrickSkinType,
ImageTypeEnum,
LegacyDxEnum,
MelClassEnum,
MelMitoticIndexEnum,
MelThickMm,
Expand Down Expand Up @@ -204,7 +205,14 @@ class MetadataRow(BaseModel):
sex: Literal["male", "female"] | None = None
anatom_site_general: AnatomSiteGeneralEnum | None = None
benign_malignant: BenignMalignantEnum | None = None
diagnosis: DiagnosisEnum | None = None
diagnosis: (
Annotated[
DiagnosisEnum,
BeforeValidator(DiagnosisEnum.accept_terminal_values),
]
| None
) = None
legacy_dx: LegacyDxEnum | None = None
diagnosis_confirm_type: DiagnosisConfirmTypeEnum | None = None
personal_hx_mm: bool | None = None
family_hx_mm: bool | None = None
Expand Down Expand Up @@ -298,8 +306,8 @@ def validate_no_benign_melanoma(self) -> MetadataRow:
if not self.benign_malignant:
return self

if (self.diagnosis == "melanoma" and self.benign_malignant == "benign") or (
self.diagnosis == "nevus"
if (DiagnosisEnum.is_melanoma(self.diagnosis) and self.benign_malignant == "benign") or (
DiagnosisEnum.is_nevus(self.diagnosis)
and self.benign_malignant
not in [
BenignMalignantEnum.benign,
Expand All @@ -324,10 +332,7 @@ def validate_non_nevus_diagnoses(self) -> MetadataRow:
if not self.diagnosis:
raise error_missing_field("nevus_type", "diagnosis")

if self.diagnosis not in [
DiagnosisEnum.nevus,
DiagnosisEnum.nevus_spilus,
]:
if not DiagnosisEnum.is_nevus(self.diagnosis):
raise error_incompatible_fields("nevus_type", "diagnosis", field2_value=self.diagnosis)

return self
Expand All @@ -347,9 +352,9 @@ def validate_melanoma_fields(self) -> MetadataRow:
continue

if not self.diagnosis:
raise error_missing_field(field, "diagnosis", field2_value="melanoma")
raise error_missing_field(field, "diagnosis", field2_value=self.diagnosis)

if self.diagnosis != "melanoma":
if not DiagnosisEnum.is_melanoma(self.diagnosis):
raise error_incompatible_fields(
field, "diagnosis", field2_value=self.diagnosis.value
)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
def test_batch():
MetadataBatch(
items=[
MetadataRow.model_validate({"diagnosis": "melanoma"}),
MetadataRow.model_validate({"diagnosis": "melanoma"}),
MetadataRow.model_validate({"sex": "male"}),
MetadataRow.model_validate({"sex": "male"}),
]
)

Expand Down
20 changes: 11 additions & 9 deletions tests/test_dependent_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,29 @@
from pydantic import ValidationError
import pytest

from isic_metadata.diagnosis_hierarchical import DiagnosisEnum
from isic_metadata.metadata import MetadataRow


def test_diagnosis_no_benign_melanoma():
@pytest.mark.parametrize(("melanoma_diagnosis"), DiagnosisEnum._melanoma_diagnoses())
def test_diagnosis_no_benign_melanoma(melanoma_diagnosis: str):
with pytest.raises(ValidationError) as excinfo:
MetadataRow.model_validate({"diagnosis": "melanoma", "benign_malignant": "benign"})
MetadataRow.model_validate({"diagnosis": melanoma_diagnosis, "benign_malignant": "benign"})
assert len(excinfo.value.errors()) == 1
assert "melanoma is incompatible with benign_malignant" in excinfo.value.errors()[0]["msg"]
assert " is incompatible with benign_malignant" in excinfo.value.errors()[0]["msg"]


@pytest.mark.parametrize("benign_malignant", ["malignant", "indeterminate/malignant"])
def test_diagnosis_no_malignant_nevus(benign_malignant: str):
with pytest.raises(ValidationError) as excinfo:
MetadataRow.model_validate({"diagnosis": "nevus", "benign_malignant": benign_malignant})
MetadataRow.model_validate({"diagnosis": "Nevus", "benign_malignant": benign_malignant})
assert len(excinfo.value.errors()) == 1
assert "nevus is incompatible with benign_malignant" in excinfo.value.errors()[0]["msg"]
assert " is incompatible with benign_malignant" in excinfo.value.errors()[0]["msg"]


@pytest.mark.parametrize(
("diagnosis", "error_message"),
[(None, "requires setting diagnosis"), ("melanoma", "is incompatible with diagnosis")],
[(None, "requires setting diagnosis"), ("Melanoma Invasive", "is incompatible with diagnosis")],
)
def test_nevus_type_needs_nevus_diagnosis(diagnosis: str | None, error_message: str):
with pytest.raises(ValidationError) as excinfo:
Expand All @@ -32,7 +34,7 @@ def test_nevus_type_needs_nevus_diagnosis(diagnosis: str | None, error_message:
assert f"nevus_type {error_message}" in excinfo.value.errors()[0]["msg"]


@pytest.mark.parametrize("diagnosis", [None, "basal cell carcinoma"])
@pytest.mark.parametrize("diagnosis", [None, "Basal cell carcinoma"])
@pytest.mark.parametrize(
("field_name", "field_value"),
[
Expand All @@ -51,7 +53,7 @@ def test_melanoma_fields_require_melanoma_diagnosis(
assert len(excinfo.value.errors()) == 1
assert field_name in excinfo.value.errors()[0]["msg"]

MetadataRow.model_validate({field_name: field_value, "diagnosis": "melanoma"})
MetadataRow.model_validate({field_name: field_value, "diagnosis": "Melanoma Invasive"})


@pytest.mark.skip("TODO: https://github.com/ImageMarkup/tracker/issues/141")
Expand All @@ -62,7 +64,7 @@ def test_diagnosis_confirm_type_requires_diagnosis():
assert excinfo.value.errors()[0]["loc"][0] == "diagnosis_confirm_type"

MetadataRow.model_validate(
{"diagnosis": "melanoma", "diagnosis_confirm_type": "histopathology"}
{"diagnosis": "Melanoma Invasive", "diagnosis_confirm_type": "histopathology"}
)


Expand Down
53 changes: 41 additions & 12 deletions tests/test_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pydantic import ValidationError
import pytest

from isic_metadata.diagnosis_hierarchical import DiagnosisEnum
from isic_metadata.metadata import MetadataRow, convert_errors


Expand All @@ -17,8 +18,8 @@
("age", "54", 54, {}),
("melanocytic", "True", True, {}),
("clin_size_long_diam_mm", "4mm", Decimal("4.0"), {}),
("mel_thick_mm", ".33mm", Decimal("0.33"), {"diagnosis": "melanoma"}),
("mel_ulcer", "false", False, {"diagnosis": "melanoma"}),
("mel_thick_mm", ".33mm", Decimal("0.33"), {"diagnosis": "Melanoma Invasive"}),
("mel_ulcer", "false", False, {"diagnosis": "Melanoma Invasive"}),
("family_hx_mm", "False", False, {}),
("personal_hx_mm", "0", False, {}),
("acquisition_day", "142", 142, {}),
Expand All @@ -35,26 +36,27 @@ def test_non_str_types(

@pytest.mark.parametrize(("emptyish_value"), ["", " ", "\t", None])
def test_empty_fields_are_omitted(emptyish_value: Any):
metadata = MetadataRow.model_validate({"diagnosis": "melanoma", "mel_type": emptyish_value})
assert metadata.diagnosis == "melanoma"
metadata = MetadataRow.model_validate({"diagnosis": "Benign", "mel_type": emptyish_value})
assert metadata.diagnosis == "Benign"
assert metadata.mel_thick_mm is None


def test_unstructured_fields():
metadata = MetadataRow.model_validate({"diagnosis": "melanoma", "hello": "world"})
assert metadata.diagnosis == "melanoma"
metadata = MetadataRow.model_validate({"diagnosis": "Benign", "hello": "world"})
assert metadata.diagnosis == "Benign"
assert metadata.unstructured["hello"] == "world"


def test_melanoma_fields():
@pytest.mark.parametrize(("melanoma_diagnosis"), DiagnosisEnum._melanoma_diagnoses())
def test_melanoma_fields(melanoma_diagnosis: str):
with pytest.raises(ValidationError) as excinfo:
# mel_class can only be set if diagnosis is melanoma
MetadataRow.model_validate({"diagnosis": "angioma", "mel_class": "invasive melanoma"})
MetadataRow.model_validate({"diagnosis": "Benign", "mel_class": "invasive melanoma"})
assert len(excinfo.value.errors()) == 1
assert "mel_class is incompatible with diagnosis" in excinfo.value.errors()[0]["msg"]

# mel_class can only be set if diagnosis is melanoma
MetadataRow.model_validate({"diagnosis": "melanoma", "mel_class": "invasive melanoma"})
MetadataRow.model_validate({"diagnosis": melanoma_diagnosis, "mel_class": "invasive melanoma"})


@given(age=st.integers(min_value=0).map(str))
Expand All @@ -76,8 +78,9 @@ def test_benign_malignant():
MetadataRow.model_validate({"benign_malignant": "benign"})


def test_nevus_diagnosis():
MetadataRow.model_validate({"diagnosis": "nevus", "nevus_type": "blue"})
@pytest.mark.parametrize(("nevus_diagnosis"), DiagnosisEnum._nevus_diagnoses())
def test_nevus_diagnosis(nevus_diagnosis: str):
MetadataRow.model_validate({"diagnosis": nevus_diagnosis, "nevus_type": "blue"})


@pytest.mark.parametrize(
Expand All @@ -91,7 +94,7 @@ def test_nevus_diagnosis():
],
)
def test_mel_thick_mm(raw: str, parsed: float):
metadata = MetadataRow.model_validate({"diagnosis": "melanoma", "mel_thick_mm": raw})
metadata = MetadataRow.model_validate({"diagnosis": "Melanoma Invasive", "mel_thick_mm": raw})
assert metadata.mel_thick_mm == parsed


Expand Down Expand Up @@ -121,3 +124,29 @@ def test_clin_size_long_diam_mm_invalid():
MetadataRow.model_validate({"clin_size_long_diam_mm": "foo"})
assert len(excinfo.value.errors()) == 1
assert "Unable to parse value as a number" in convert_errors(excinfo.value)[0]["msg"]


@pytest.mark.parametrize(
("raw", "parsed"),
[
("Benign", "Benign"),
("Benign - Other", "Benign:Benign - Other"),
("Blue nevus", "Benign:Benign melanocytic proliferations:Nevus:Blue nevus"),
(
"Squamous cell carcinoma, NOS",
"Malignant:Malignant epidermal proliferations:Squamous cell carcinoma, NOS",
),
(
"Blue nevus, Sclerosing",
"Benign:Benign melanocytic proliferations:Nevus:Blue nevus:Blue nevus, Sclerosing",
),
],
)
def test_diagnosis(raw, parsed):
metadata = MetadataRow.model_validate({"diagnosis": raw})
assert metadata.diagnosis == parsed, str(metadata.diagnosis)


def test_diagnosis_enum_has_unique_terminal_values():
terminal_nodes = [member.value.split(":")[-1] for member in DiagnosisEnum]
assert len(terminal_nodes) == len(set(terminal_nodes))

0 comments on commit e83d9c0

Please sign in to comment.