Skip to content

Commit

Permalink
#494 - Allow for polyA tail. Show polyA tail length, transcript/refer…
Browse files Browse the repository at this point in the history
…ence diffs on transcript version page
  • Loading branch information
davmlaw committed Oct 20, 2021
1 parent a787705 commit a1bb0b1
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 5 deletions.
44 changes: 41 additions & 3 deletions genes/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re
import shutil
import types
from collections import namedtuple, defaultdict
from collections import namedtuple, defaultdict, Counter
from dataclasses import dataclass
from datetime import timedelta
from functools import total_ordering
Expand Down Expand Up @@ -673,7 +673,7 @@ def hgvs_ok(self) -> bool:
if self.transcript.annotation_consortium == AnnotationConsortium.REFSEQ:
if "partial" in self.data:
return False
return self.sequence_info.length == self.length
return bool(self.sequence_length_matches_exon_length_ignoring_poly_a_tail)
elif self.transcript.annotation_consortium == AnnotationConsortium.ENSEMBL:
return True

Expand All @@ -683,14 +683,52 @@ def hgvs_ok(self) -> bool:
def sequence_info(self):
return TranscriptVersionSequenceInfo.get(self.accession)

@property
def sequence_poly_a_tail(self) -> int:
""" Returns length of polyA tail if ALL bases after sum of exon lengths are A """
if self.sequence_info.length > self.length:
seq_end = self.sequence_info.sequence[self.length:]
if not seq_end.upper().replace("A", ""):
return len(seq_end)
return 0

@property
def cdna_match_diff(self) -> str:
""" Human readable """
match_summary = ""
if cdna_match := self.data.get("cdna_match"):
gap_operations = Counter()
for (_, _, _, _, gap) in cdna_match:
if gap:
for gap_op in gap.split():
code = gap_op[0]
length = int(gap_op[1:])
gap_operations[code] += length

if gap_operations:
gap_summary = []
for code, label in {"I": "Insertion", "D": "Deletion"}.items():
if value := gap_operations.get(code):
gap_summary.append(f"{value}bp {label}")
match_summary = ", ".join(gap_summary)
if match_summary:
match_summary = f"Transcript had {match_summary} vs genome reference"

return match_summary


@property
def sequence_length_matches_exon_length_ignoring_poly_a_tail(self) -> bool:
return self.sequence_info.length == self.length or self.sequence_poly_a_tail

@lazy
def alignment_gap(self) -> bool:
if self.transcript.annotation_consortium == AnnotationConsortium.REFSEQ:
# Sometimes RefSeq transcripts have gaps when aligning to the genome
# We've modified PyHGVS to be able to handle this
if "cdna_match" in self.data or "partial" in self.data:
return True
return self.sequence_info.length != self.length
return not self.sequence_length_matches_exon_length_ignoring_poly_a_tail

# Ensembl transcripts use genomic sequence so there is never any gap
return False
Expand Down
10 changes: 8 additions & 2 deletions genes/templates/genes/view_transcript_version.html
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ <h3>Transcript Version {{ accession }}</h3>
{% endif %}
</td>
<td>{{ tv_sequence_info.created|localtime }}</td>
<td>{{ tv_sequence_info.length }}</td>
<td>{{ sequence_length }}</td>
</tr>
</table>
{% else %}
Expand Down Expand Up @@ -75,7 +75,13 @@ <h3>Transcript Version {{ accession }}</h3>
<td>{{ genome_build_id }}</td>
<td><a class="hover-link" href="{% url 'view_gene' transcript_version.gene.pk %}">{{ transcript_version.gene }}</a></td>
<td>{{ transcript_version.biotype }}</td>
<td>{{ transcript_version.alignment_gap }}</td>
<td>
{% if transcript_version.alignment_gap %}
{{ transcript_version.cdna_match_diff }}
{% else %}
False
{% endif %}
</td>
<td>{{ transcript_version.hgvs_ok }}</td>
{% if transcript_version.has_valid_data %}
<td>{{ transcript_version.coordinates }}</td>
Expand Down
11 changes: 11 additions & 0 deletions genes/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,11 +394,15 @@ def view_transcript_version(request, transcript_id, version):
"no_transcript_message": no_transcript_message,
"version_count": version_count}

poly_a_tail = 0
if tv:
transcript_versions_by_build = {}
builds_missing_data = set()
alignment_gap = False

for tv in tv_set.order_by("genome_build__name"):
if tv_sequence_info:
poly_a_tail = max(poly_a_tail, tv.sequence_poly_a_tail)
genome_build_id = tv.genome_build.pk
alignment_gap |= tv.alignment_gap
transcript_versions_by_build[genome_build_id] = tv
Expand All @@ -422,6 +426,13 @@ def view_transcript_version(request, transcript_id, version):
"transcript_versions_by_build": transcript_versions_by_build,
"differences": differences,
"alignment_gap": alignment_gap}}

if tv_sequence_info:
if poly_a_tail:
sequence_length = f"{tv_sequence_info.length - poly_a_tail} (w/{poly_a_tail}bp polyA tail)"
else:
sequence_length = tv_sequence_info.length
context["sequence_length"] = sequence_length
return render(request, "genes/view_transcript_version.html", context)


Expand Down

0 comments on commit a1bb0b1

Please sign in to comment.