From 1c3d4a606190a79544fe23864435bc795734895c Mon Sep 17 00:00:00 2001 From: Christine Lo Date: Wed, 19 Aug 2015 08:07:21 -0700 Subject: [PATCH 1/6] validate number of columns in refgene file --- pyhgvs/utils.py | 6 ++++++ setup.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py index b79e6a4..a2b5b28 100644 --- a/pyhgvs/utils.py +++ b/pyhgvs/utils.py @@ -4,6 +4,8 @@ from itertools import imap +from django.core.exceptions import ValidationError + from .models import Exon from .models import Position from .models import Transcript @@ -39,6 +41,10 @@ def read_refgene(infile): if line.startswith('#'): continue row = line.rstrip('\n').split('\t') + if len(row) != 16: + raise ValidationError( + 'File has incorrect number of columns ' + 'in at least one line.', code='invalid') # Skip trailing , exon_starts = map(int, row[9].split(',')[:-1]) diff --git a/setup.py b/setup.py index 7d7d01b..7bb2e0d 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def main(): setup( name='pyhgvs', - version='0.9.5', + version='0.9.6', description='HGVS name parsing and formatting', long_description=description, author='Matt Rasmussen', From d26d44aff1e3810698bb5b718d9ffd3a2e2ac5f9 Mon Sep 17 00:00:00 2001 From: Christine Lo Date: Wed, 19 Aug 2015 09:38:35 -0700 Subject: [PATCH 2/6] use ValueError --- pyhgvs/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py index a2b5b28..b6f6c14 100644 --- a/pyhgvs/utils.py +++ b/pyhgvs/utils.py @@ -4,8 +4,6 @@ from itertools import imap -from django.core.exceptions import ValidationError - from .models import Exon from .models import Position from .models import Transcript @@ -42,7 +40,7 @@ def read_refgene(infile): continue row = line.rstrip('\n').split('\t') if len(row) != 16: - raise ValidationError( + raise ValueError( 'File has incorrect number of columns ' 'in at least one line.', code='invalid') From 08f43857b1f491434473ed8c75b418a70388ac89 Mon Sep 17 00:00:00 2001 From: Christine Lo Date: Wed, 19 Aug 2015 09:39:45 -0700 Subject: [PATCH 3/6] keep start 0-based --- pyhgvs/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py index b6f6c14..ac7cdd6 100644 --- a/pyhgvs/utils.py +++ b/pyhgvs/utils.py @@ -51,7 +51,7 @@ def read_refgene(infile): yield { 'chrom': row[2], - 'start': int(row[4]) + 1, + 'start': int(row[4]), 'end': int(row[5]), 'id': row[1], 'strand': row[3], From 98e12a9e4cf08c6ccc0183d0b1309e11290490a4 Mon Sep 17 00:00:00 2001 From: Christine Lo Date: Wed, 19 Aug 2015 12:35:33 -0700 Subject: [PATCH 4/6] add exon frames to read_refgene transcript json --- pyhgvs/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py index ac7cdd6..8d132ad 100644 --- a/pyhgvs/utils.py +++ b/pyhgvs/utils.py @@ -47,7 +47,8 @@ def read_refgene(infile): # Skip trailing , exon_starts = map(int, row[9].split(',')[:-1]) exon_ends = map(int, row[10].split(',')[:-1]) - exons = zip(exon_starts, exon_ends) + exon_frames = map(int, row[15].split(','[:-1])) + exons = zip(exon_starts, exon_ends, exon_frames) yield { 'chrom': row[2], @@ -92,7 +93,7 @@ def make_transcript(transcript_json): if not transcript.tx_position.is_forward_strand: exons = reversed(exons) - for exon_number, (exon_start, exon_end) in enumerate(exons, 1): + for exon_number, (exon_start, exon_end, exon_frame) in enumerate(exons, 1): transcript.exons.append( Exon(transcript=transcript, tx_position=Position( From b0b16d067f9bf3fb9ada4e8cd11e1fb2dcc6de7b Mon Sep 17 00:00:00 2001 From: Christine Lo Date: Wed, 19 Aug 2015 12:56:28 -0700 Subject: [PATCH 5/6] transcript start is 0-based --- pyhgvs/__init__.py | 4 ++-- pyhgvs/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyhgvs/__init__.py b/pyhgvs/__init__.py index c9b2c1b..148cb3b 100644 --- a/pyhgvs/__init__.py +++ b/pyhgvs/__init__.py @@ -534,7 +534,7 @@ def cdna_to_genomic_coord(transcript, coord): # 5' flanking sequence. if pos < 1: if transcript_strand: - return transcript.tx_position.chrom_start + pos - 1 + return transcript.tx_position.chrom_start + pos else: return transcript.tx_position.chrom_stop - pos + 1 @@ -1358,7 +1358,7 @@ def parse_hgvs_name(hgvs_name, genome, transcript=None, if transcript and hgvs.transcript in genome: # Reference sequence is directly known, use it. genome = GenomeSubset(genome, transcript.tx_position.chrom, - transcript.tx_position.chrom_start - 1, + transcript.tx_position.chrom_start, transcript.tx_position.chrom_stop, hgvs.transcript) diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py index 8d132ad..d8eabe0 100644 --- a/pyhgvs/utils.py +++ b/pyhgvs/utils.py @@ -47,7 +47,7 @@ def read_refgene(infile): # Skip trailing , exon_starts = map(int, row[9].split(',')[:-1]) exon_ends = map(int, row[10].split(',')[:-1]) - exon_frames = map(int, row[15].split(','[:-1])) + exon_frames = map(int, row[15].split(',')[:-1]) exons = zip(exon_starts, exon_ends, exon_frames) yield { From 3e44f32dc787cfa58f2f89d1579d5eb4ede2b63e Mon Sep 17 00:00:00 2001 From: Christine Lo Date: Thu, 20 Aug 2015 13:21:07 -0700 Subject: [PATCH 6/6] add new field exon_frames to json --- pyhgvs/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py index d8eabe0..4b715f3 100644 --- a/pyhgvs/utils.py +++ b/pyhgvs/utils.py @@ -48,7 +48,7 @@ def read_refgene(infile): exon_starts = map(int, row[9].split(',')[:-1]) exon_ends = map(int, row[10].split(',')[:-1]) exon_frames = map(int, row[15].split(',')[:-1]) - exons = zip(exon_starts, exon_ends, exon_frames) + exons = zip(exon_starts, exon_ends) yield { 'chrom': row[2], @@ -60,6 +60,7 @@ def read_refgene(infile): 'cds_end': int(row[7]), 'gene_name': row[12], 'exons': exons, + 'exon_frames': exon_frames } @@ -93,7 +94,7 @@ def make_transcript(transcript_json): if not transcript.tx_position.is_forward_strand: exons = reversed(exons) - for exon_number, (exon_start, exon_end, exon_frame) in enumerate(exons, 1): + for exon_number, (exon_start, exon_end) in enumerate(exons, 1): transcript.exons.append( Exon(transcript=transcript, tx_position=Position(