diff --git a/pyhgvs/__init__.py b/pyhgvs/__init__.py index c9b2c1b..148cb3b 100644 --- a/pyhgvs/__init__.py +++ b/pyhgvs/__init__.py @@ -534,7 +534,7 @@ def cdna_to_genomic_coord(transcript, coord): # 5' flanking sequence. if pos < 1: if transcript_strand: - return transcript.tx_position.chrom_start + pos - 1 + return transcript.tx_position.chrom_start + pos else: return transcript.tx_position.chrom_stop - pos + 1 @@ -1358,7 +1358,7 @@ def parse_hgvs_name(hgvs_name, genome, transcript=None, if transcript and hgvs.transcript in genome: # Reference sequence is directly known, use it. genome = GenomeSubset(genome, transcript.tx_position.chrom, - transcript.tx_position.chrom_start - 1, + transcript.tx_position.chrom_start, transcript.tx_position.chrom_stop, hgvs.transcript) diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py index b79e6a4..4b715f3 100644 --- a/pyhgvs/utils.py +++ b/pyhgvs/utils.py @@ -39,15 +39,20 @@ def read_refgene(infile): if line.startswith('#'): continue row = line.rstrip('\n').split('\t') + if len(row) != 16: + raise ValueError( + 'File has incorrect number of columns ' + 'in at least one line.', code='invalid') # Skip trailing , exon_starts = map(int, row[9].split(',')[:-1]) exon_ends = map(int, row[10].split(',')[:-1]) + exon_frames = map(int, row[15].split(',')[:-1]) exons = zip(exon_starts, exon_ends) yield { 'chrom': row[2], - 'start': int(row[4]) + 1, + 'start': int(row[4]), 'end': int(row[5]), 'id': row[1], 'strand': row[3], @@ -55,6 +60,7 @@ def read_refgene(infile): 'cds_end': int(row[7]), 'gene_name': row[12], 'exons': exons, + 'exon_frames': exon_frames } diff --git a/setup.py b/setup.py index 7d7d01b..7bb2e0d 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def main(): setup( name='pyhgvs', - version='0.9.5', + version='0.9.6', description='HGVS name parsing and formatting', long_description=description, author='Matt Rasmussen',