Skip to content

Commit

Permalink
issue #1099 - store hgvs converter used during matching process (on t…
Browse files Browse the repository at this point in the history
…op of note)
  • Loading branch information
davmlaw committed Jul 5, 2024
1 parent 0c50354 commit 5a9ecfd
Show file tree
Hide file tree
Showing 11 changed files with 199 additions and 46 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.10 on 2024-07-04 08:06

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('classification', '0144_one_off_populate_historical_resolved_variant_hgvsconverter_version'),
]

operations = [
migrations.AddField(
model_name='importedalleleinfo',
name='hgvs_converter_version',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.PROTECT, to='classification.hgvsconverterversion'),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.10 on 2024-07-05 03:02

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('classification', '0145_importedalleleinfo_hgvs_converter_version'),
]

operations = [
migrations.AddField(
model_name='hgvsconverterversion',
name='used_converter_type',
field=models.TextField(blank=True, null=True),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Generated by Django 4.2.10 on 2024-07-05 03:02

from django.db import migrations
from django.db.models import F, Q, Min

from genes.hgvs import HGVSConverterType


def _one_off_populate_hgvsconverterversion_used_converter_type(apps, schema_editor):
HGVSConverterVersion = apps.get_model('classification', 'HGVSConverterVersion')
ResolvedVariantInfo = apps.get_model('classification', 'ResolvedVariantInfo')
ImportedAlleleInfo = apps.get_model('classification', 'ImportedAlleleInfo')

internal_qs = HGVSConverterVersion.objects.filter(method__startswith='Internally converted')
internal_qs.update(used_converter_type=F("hgvs_converter_type"))
clingen_qs = HGVSConverterVersion.objects.filter(method__startswith='ClinGen Allele Registry')
clingen_qs.update(used_converter_type=HGVSConverterType.CLINGEN_ALLELE_REGISTRY.name)

for converter_type in HGVSConverterType:
qs = HGVSConverterVersion.objects.filter(Q(method__startswith=str(converter_type)) | Q(method__startswith=converter_type.name))
qs.update(used_converter_type=converter_type.name)

# Legacy ones were 'unknown tool...' - ok to have these as NULL

# We should probably remove dupes too (not including method)
unique_fields = ("hgvs_converter_type", "version", "used_converter_type", "code_git_hash")
newest_hcvs = HGVSConverterVersion.objects.values(*unique_fields).annotate(first_id=Min('id'))
newest_ids = set(newest_hcvs.values_list("first_id", flat=True))

# The only object that should have it now is ResolvedVariantInfo
for hcvs in HGVSConverterVersion.objects.filter(pk__in=newest_ids):
kwargs = {
f"c_hgvs_converter_version__{field}": getattr(hcvs, field) for field in unique_fields
}
ResolvedVariantInfo.objects.filter(**kwargs).update(c_hgvs_converter_version=hcvs)

if num := HGVSConverterVersion.objects.exclude(pk__in=newest_ids).delete():
print(f"Deleted {num} dupe HGVSConverterVersion objects")

# Now we need to assign all AlleleInfo to hgvs_converter_version
if legacy := HGVSConverterVersion.objects.filter(version__startswith='Legacy').first():
ImportedAlleleInfo.objects.all().update(hgvs_converter_version=legacy)



class Migration(migrations.Migration):

dependencies = [
('classification', '0146_hgvsconverterversion_used_converter_type'),
]

operations = [
migrations.RunPython(_one_off_populate_hgvsconverterversion_used_converter_type)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 4.2.10 on 2024-07-05 06:46

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('classification', '0147_one_off_populate_hgvsconverterversion_used_converter_type'),
]

operations = [
migrations.AlterUniqueTogether(
name='hgvsconverterversion',
unique_together={('hgvs_converter_type', 'version', 'used_converter_type', 'code_git_hash')},
),
migrations.RemoveField(
model_name='hgvsconverterversion',
name='method',
),
]
36 changes: 26 additions & 10 deletions classification/models/classification_variant_info_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,26 +52,29 @@
class HGVSConverterVersion(TimeStampedModel):
hgvs_converter_type = models.TextField() # HGVSConverterType
version = models.TextField()
method = models.TextField() # Records e.g. fall back to ClinGen
used_converter_type = models.TextField(blank=True, null=True) # HGVSConverterType
code_git_hash = models.TextField()

class Meta:
unique_together = ("hgvs_converter_type", "version", "used_converter_type", "code_git_hash")

@property
def converted_using_library(self) -> bool:
return self.method.startswith("Internally converted using library")
return self.hgvs_converter_type == self.used_converter_type

def __str__(self) -> str:
desc = f"{self.hgvs_converter_type} version=\"{self.version}\""
if not self.converted_using_library:
desc += f" (method={self.method})"
if self.used_converter_type and not self.converted_using_library:
desc += f" (used_converter_type={self.used_converter_type})"
if self.code_git_hash != "not-a-real-git-hash":
desc += f" git@{self.code_git_hash}"
return desc

@staticmethod
def get(hgvs_converter_type: HGVSConverterType, version: str, method: str):
def get(hgvs_converter_type: HGVSConverterType, version: str, used_converter_type: HGVSConverterType):
return HGVSConverterVersion.objects.get_or_create(hgvs_converter_type=hgvs_converter_type.name,
version=version,
method=method,
used_converter_type=used_converter_type.name,
code_git_hash=get_cached_project_git_hash())[0]


Expand Down Expand Up @@ -161,13 +164,14 @@ def set_variant_and_save(self, variant: Variant) -> 'ResolvedVariantInfo':
version = hgvs_matcher.hgvs_converter.get_version()

try:
hgvs_variant, method = hgvs_matcher.variant_to_hgvs_variant_and_method(variant, imported_transcript)
hgvs_variant, used_converter_type, method = hgvs_matcher.variant_to_hgvs_variant_used_converter_type_and_method(variant, imported_transcript)
c_hgvs = hgvs_variant.format()
c_hgvs_obj = CHGVS(c_hgvs)
self.c_hgvs = c_hgvs
self.c_hgvs_compat = hgvs_variant.format(use_compat=True,
max_ref_length=settings.CLASSIFICATION_MAX_REFERENCE_LENGTH)
self.c_hgvs_converter_version = HGVSConverterVersion.get(hgvs_converter_type, version=version, method=method)
self.c_hgvs_converter_version = HGVSConverterVersion.get(hgvs_converter_type, version=version,
used_converter_type=used_converter_type)
self.transcript_version = c_hgvs_obj.transcript_version_model(genome_build=genome_build)
self.gene_symbol = GeneSymbol.objects.filter(symbol=c_hgvs_obj.gene_symbol).first()
except Exception as exception:
Expand Down Expand Up @@ -373,6 +377,7 @@ class CalculatedVariantCoordinate:
variant_coordinate: Optional[VariantCoordinate]
genome_build: GenomeBuild
message: str
hgvs_converter_version: Optional[HGVSConverterVersion]

@property
def variant_coordinate_str(self) -> Optional[str]:
Expand Down Expand Up @@ -406,6 +411,9 @@ class ImportedAlleleInfo(TimeStampedModel):

imported_g_hgvs = TextField(null=True, blank=True)

hgvs_converter_version = ForeignKey(HGVSConverterVersion, null=True, blank=True, on_delete=PROTECT)
""" Tool used to resolve hgvs """

imported_transcript = TextField(null=True, blank=True)
"""
Only needed if we're using g.hgvs
Expand Down Expand Up @@ -680,17 +688,23 @@ def resolved_builds(self) -> list[ResolvedVariantInfo]:
def calculate_variant_coordinate(self) -> CalculatedVariantCoordinate:
vc: Optional[VariantCoordinate] = None
genome_build: Optional[GenomeBuild] = None
message: str
hgvs_converter_version: Optional[HGVSConverterVersion] = None
try:
genome_build = self.imported_genome_build_patch_version.genome_build
use_hgvs = self.imported_c_hgvs or self.imported_g_hgvs
hgvs_matcher = HGVSMatcher(genome_build)
hgvs_converter_type = hgvs_matcher.hgvs_converter.get_hgvs_converter_type()
version = hgvs_matcher.hgvs_converter.get_version()

vc_extra = hgvs_matcher.get_variant_coordinate_used_transcript_kind_method_and_matches_reference(use_hgvs)
message = f"HGVS matched by \"{vc_extra.method}\""
hgvs_converter_version = HGVSConverterVersion.get(hgvs_converter_type, version=version,
used_converter_type=vc_extra.used_converter_type)
vc = vc_extra.variant_coordinate
except Exception as ex:
message = str(ex)
return CalculatedVariantCoordinate(variant_coordinate=vc, genome_build=genome_build, message=message)
return CalculatedVariantCoordinate(variant_coordinate=vc, genome_build=genome_build,
message=message, hgvs_converter_version=hgvs_converter_version)

def update_variant_coordinate(self):
""" returns if a valid variant_coordinate could be derived """
Expand All @@ -700,6 +714,7 @@ def update_variant_coordinate(self):
# but it's better to do that in the validation step
cvc = self.calculate_variant_coordinate()
self.message = cvc.message
self.hgvs_converter_version = cvc.hgvs_converter_version
self.variant_coordinate = cvc.variant_coordinate_str
if not cvc.is_valid:
self.status = ImportedAlleleInfoStatus.FAILED
Expand Down Expand Up @@ -779,6 +794,7 @@ def set_matching_failed(self, message: Optional[str] = None):
def hard_reset_matching_info(self):
self.status = ImportedAlleleInfoStatus.PROCESSING
self.matched_variant = None
self.hgvs_converter_version = None
self.allele = None
for genome_build in [GenomeBuild.grch37(), GenomeBuild.grch38()]:
self._update_variant(genome_build=genome_build, variant=None)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
<h4>Imported Allele Info ({{ allele_info.pk }})</h4>
{% endif %}

{% labelled admin_only=True label="Created" value_css="text-monospace" %}{{ allele_info.created }}{% endlabelled %}
{% labelled admin_only=True label="Modified" value_css="text-monospace" %}{{ allele_info.modified }}{% endlabelled %}

{% comment %}
{% for diff_text in c_hgvses %}
{% labelled label=diff_text.identifier value_css="text-monospace" %}{{ diff_text.html }}{% endlabelled %}
Expand All @@ -17,6 +20,7 @@ <h4>Imported Allele Info ({{ allele_info.pk }})</h4>
{% else %}
{% labelled label=c_hgvses.0.identifier value_css="text-monospace" %}{{ c_hgvses.0.html }}{% endlabelled %}
{% endif %}
{% labelled admin_only=True label="HGVS matched by" value_css="text-monospace" %}{{ allele_info.hgvs_converter_version }}{% endlabelled %}
{% if normalized_diff %}
{% labelled label="Normalised Diffs" label_css="text-muted" value_css="text-muted" %}<i class="fa-solid fa-arrows-up-down"></i> {{ normalized_diff|separator:', ' }}{% endlabelled %}
{% endif %}
Expand Down
3 changes: 3 additions & 0 deletions genes/hgvs/hgvs_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ class HGVSConverterType(Enum):
COMBO = 3
CLINGEN_ALLELE_REGISTRY = 4 # This is not a full implementation just enough for HGVS tester tool

def is_internal_type(self) -> bool:
return self in (HGVSConverterType.PYHGVS, HGVSConverterType.BIOCOMMONS_HGVS)


class HgvsMatchRefAllele:
def __init__(self, provided_ref: str, calculated_ref: str):
Expand Down
Loading

0 comments on commit 5a9ecfd

Please sign in to comment.