From 1c3d4a606190a79544fe23864435bc795734895c Mon Sep 17 00:00:00 2001
From: Christine Lo <clo@counsyl.com>
Date: Wed, 19 Aug 2015 08:07:21 -0700
Subject: [PATCH 1/6] validate number of columns in refgene file

---
 pyhgvs/utils.py | 6 ++++++
 setup.py        | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py
index b79e6a4..a2b5b28 100644
--- a/pyhgvs/utils.py
+++ b/pyhgvs/utils.py
@@ -4,6 +4,8 @@
 
 from itertools import imap
 
+from django.core.exceptions import ValidationError
+
 from .models import Exon
 from .models import Position
 from .models import Transcript
@@ -39,6 +41,10 @@ def read_refgene(infile):
         if line.startswith('#'):
             continue
         row = line.rstrip('\n').split('\t')
+        if len(row) != 16:
+            raise ValidationError(
+                'File has incorrect number of columns '
+                'in at least one line.', code='invalid')
 
         # Skip trailing ,
         exon_starts = map(int, row[9].split(',')[:-1])
diff --git a/setup.py b/setup.py
index 7d7d01b..7bb2e0d 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@ def main():
 
     setup(
         name='pyhgvs',
-        version='0.9.5',
+        version='0.9.6',
         description='HGVS name parsing and formatting',
         long_description=description,
         author='Matt Rasmussen',

From d26d44aff1e3810698bb5b718d9ffd3a2e2ac5f9 Mon Sep 17 00:00:00 2001
From: Christine Lo <clo@counsyl.com>
Date: Wed, 19 Aug 2015 09:38:35 -0700
Subject: [PATCH 2/6] use ValueError

---
 pyhgvs/utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py
index a2b5b28..b6f6c14 100644
--- a/pyhgvs/utils.py
+++ b/pyhgvs/utils.py
@@ -4,8 +4,6 @@
 
 from itertools import imap
 
-from django.core.exceptions import ValidationError
-
 from .models import Exon
 from .models import Position
 from .models import Transcript
@@ -42,7 +40,7 @@ def read_refgene(infile):
             continue
         row = line.rstrip('\n').split('\t')
         if len(row) != 16:
-            raise ValidationError(
+            raise ValueError(
                 'File has incorrect number of columns '
                 'in at least one line.', code='invalid')
 

From 08f43857b1f491434473ed8c75b418a70388ac89 Mon Sep 17 00:00:00 2001
From: Christine Lo <clo@counsyl.com>
Date: Wed, 19 Aug 2015 09:39:45 -0700
Subject: [PATCH 3/6] keep start 0-based

---
 pyhgvs/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py
index b6f6c14..ac7cdd6 100644
--- a/pyhgvs/utils.py
+++ b/pyhgvs/utils.py
@@ -51,7 +51,7 @@ def read_refgene(infile):
 
         yield {
             'chrom': row[2],
-            'start': int(row[4]) + 1,
+            'start': int(row[4]),
             'end': int(row[5]),
             'id': row[1],
             'strand': row[3],

From 98e12a9e4cf08c6ccc0183d0b1309e11290490a4 Mon Sep 17 00:00:00 2001
From: Christine Lo <clo@counsyl.com>
Date: Wed, 19 Aug 2015 12:35:33 -0700
Subject: [PATCH 4/6] add exon frames to read_refgene transcript json

---
 pyhgvs/utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py
index ac7cdd6..8d132ad 100644
--- a/pyhgvs/utils.py
+++ b/pyhgvs/utils.py
@@ -47,7 +47,8 @@ def read_refgene(infile):
         # Skip trailing ,
         exon_starts = map(int, row[9].split(',')[:-1])
         exon_ends = map(int, row[10].split(',')[:-1])
-        exons = zip(exon_starts, exon_ends)
+        exon_frames = map(int, row[15].split(','[:-1]))
+        exons = zip(exon_starts, exon_ends, exon_frames)
 
         yield {
             'chrom': row[2],
@@ -92,7 +93,7 @@ def make_transcript(transcript_json):
     if not transcript.tx_position.is_forward_strand:
         exons = reversed(exons)
 
-    for exon_number, (exon_start, exon_end) in enumerate(exons, 1):
+    for exon_number, (exon_start, exon_end, exon_frame) in enumerate(exons, 1):
         transcript.exons.append(
             Exon(transcript=transcript,
                  tx_position=Position(

From b0b16d067f9bf3fb9ada4e8cd11e1fb2dcc6de7b Mon Sep 17 00:00:00 2001
From: Christine Lo <clo@counsyl.com>
Date: Wed, 19 Aug 2015 12:56:28 -0700
Subject: [PATCH 5/6] transcript start is 0-based

---
 pyhgvs/__init__.py | 4 ++--
 pyhgvs/utils.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyhgvs/__init__.py b/pyhgvs/__init__.py
index c9b2c1b..148cb3b 100644
--- a/pyhgvs/__init__.py
+++ b/pyhgvs/__init__.py
@@ -534,7 +534,7 @@ def cdna_to_genomic_coord(transcript, coord):
     # 5' flanking sequence.
     if pos < 1:
         if transcript_strand:
-            return transcript.tx_position.chrom_start + pos - 1
+            return transcript.tx_position.chrom_start + pos
         else:
             return transcript.tx_position.chrom_stop - pos + 1
 
@@ -1358,7 +1358,7 @@ def parse_hgvs_name(hgvs_name, genome, transcript=None,
     if transcript and hgvs.transcript in genome:
         # Reference sequence is directly known, use it.
         genome = GenomeSubset(genome, transcript.tx_position.chrom,
-                              transcript.tx_position.chrom_start - 1,
+                              transcript.tx_position.chrom_start,
                               transcript.tx_position.chrom_stop,
                               hgvs.transcript)
 
diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py
index 8d132ad..d8eabe0 100644
--- a/pyhgvs/utils.py
+++ b/pyhgvs/utils.py
@@ -47,7 +47,7 @@ def read_refgene(infile):
         # Skip trailing ,
         exon_starts = map(int, row[9].split(',')[:-1])
         exon_ends = map(int, row[10].split(',')[:-1])
-        exon_frames = map(int, row[15].split(','[:-1]))
+        exon_frames = map(int, row[15].split(',')[:-1])
         exons = zip(exon_starts, exon_ends, exon_frames)
 
         yield {

From 3e44f32dc787cfa58f2f89d1579d5eb4ede2b63e Mon Sep 17 00:00:00 2001
From: Christine Lo <clo@counsyl.com>
Date: Thu, 20 Aug 2015 13:21:07 -0700
Subject: [PATCH 6/6] add new field exon_frames to json

---
 pyhgvs/utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyhgvs/utils.py b/pyhgvs/utils.py
index d8eabe0..4b715f3 100644
--- a/pyhgvs/utils.py
+++ b/pyhgvs/utils.py
@@ -48,7 +48,7 @@ def read_refgene(infile):
         exon_starts = map(int, row[9].split(',')[:-1])
         exon_ends = map(int, row[10].split(',')[:-1])
         exon_frames = map(int, row[15].split(',')[:-1])
-        exons = zip(exon_starts, exon_ends, exon_frames)
+        exons = zip(exon_starts, exon_ends)
 
         yield {
             'chrom': row[2],
@@ -60,6 +60,7 @@ def read_refgene(infile):
             'cds_end': int(row[7]),
             'gene_name': row[12],
             'exons': exons,
+            'exon_frames': exon_frames
         }
 
 
@@ -93,7 +94,7 @@ def make_transcript(transcript_json):
     if not transcript.tx_position.is_forward_strand:
         exons = reversed(exons)
 
-    for exon_number, (exon_start, exon_end, exon_frame) in enumerate(exons, 1):
+    for exon_number, (exon_start, exon_end) in enumerate(exons, 1):
         transcript.exons.append(
             Exon(transcript=transcript,
                  tx_position=Position(