Skip to content

Commit

Permalink
Merge pull request #18 from dev/right_normalize_variant
Browse files Browse the repository at this point in the history
Right normalize variant
  • Loading branch information
Christine Lo committed Apr 16, 2015
2 parents aebe5bd + dae151e commit def8ab6
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 19 deletions.
6 changes: 6 additions & 0 deletions pyhgvs/tests/data/test_variants.genome
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@ chr17 41246218 41246248 TTTACATATTAAAGCCTCATGAGGATCACT
chr17 41246248 41246278 GGCCAGTAAGTCTATTTTCTCTGAAGAACC
chr1 0 41 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
chr1 1 31 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
chr17 78078913 78078958 AACCGCCTCCACTTCACGGTGGGCAGGGCAGGGGCGGGGGCGGCG
chr17 78078916 78078946 CGCCTCCACTTCACGGTGGGCAGGGCAGGG
chr17 78078946 78078976 GCGGGGGCGGCGGCCAGGGCAGAGGGTGCG
chr7 117199624 117199667 CACCATTAAAGAAAATATCATCTTTGGTGTTTCCTATGATGAA
chr7 117199615 117199645 TATGCCTGGCACCATTAAAGAAAATATCAT
chr7 117199648 117199678 TGGTGTTTCCTATGATGAATATAGATACAG
35 changes: 26 additions & 9 deletions pyhgvs/tests/test_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,48 @@
_normalize_tests = [
# Simple SNP.
(('chr17', 41246250, 'G', ['C']),
('chr17', 41246250, 'G', ['C'])),
('chr17', 41246250, 'G', ['C']),
'left'),

# Left-align and 1bp pad.
(('chr17', 41246251, '', ['G']),
('chr17', 41246248, 'T', ['TG'])),
('chr17', 41246248, 'T', ['TG']),
'left'),

# Trim common prefix, left-align, and 1bp pad.
(('chr17', 41246250, 'G', ['GG']),
('chr17', 41246248, 'T', ['TG'])),
('chr17', 41246248, 'T', ['TG']),
'left'),

# Trim common prefix.
(('chr17', 41246248, 'TGGC', ['TGGA']),
('chr17', 41246251, 'C', ['A'])),
('chr17', 41246251, 'C', ['A']),
'left'),

# Trim common prefix and suffix.
(('chr17', 41246248, 'TGGC', ['TGAC']),
('chr17', 41246250, 'G', ['A'])),
('chr17', 41246250, 'G', ['A']),
'left'),

# Trim common prefix, triallelic
(('chr17', 41246248, 'TGGC', ['TGGA', 'TGAC']),
('chr17', 41246249, 'GGC', ['GAC', 'GGA'])),
('chr17', 41246249, 'GGC', ['GAC', 'GGA']),
'left'),

# Left edge of chromosome left justify, right pad.
(('chr1', 5, 'NN', ['N']),
('chr1', 1, 'NN', ['N'])),
('chr1', 1, 'NN', ['N']),
'left'),

# Insertion. Trim common prefix, right-align, and 1bp pad.
(('chr17', 78078933, 'T', ['TGGGCA']),
('chr17', 78078946, 'G', ['GCAGGG']),
'right'),

# Deletion. Trim common prefix, right-align, and 1bp pad.
(('chr7', 117199644, 'ATCT', ['A']),
('chr7', 117199645, 'TCTT', ['T']),
'right')
]


Expand All @@ -55,10 +72,10 @@ def test_normalize_variant(self):
filename='pyhgvs/tests/data/test_variants.genome',
create_data=False)

for variant, true_variant in _normalize_tests:
for variant, true_variant, justify in _normalize_tests:
chrom, offset, ref, alts = variant
norm_variant = normalize_variant(
chrom, offset, ref, alts, genome).variant
chrom, offset, ref, alts, genome, justify=justify).variant
self.assertEqual(
norm_variant, true_variant,
'Variant failed to normalize %s: %s != %s' %
Expand Down
22 changes: 13 additions & 9 deletions pyhgvs/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def justify_genomic_indel(genome, chrom, start, end, indel, justify,


def normalize_variant(chrom, offset, ref_sequence, alt_sequences, genome,
flank_length=30):
justify='left', flank_length=30):
"""
Normalize variant according to the GATK/VCF standard.
Expand All @@ -131,7 +131,7 @@ def normalize_variant(chrom, offset, ref_sequence, alt_sequences, genome,
chrom_stop=end,
is_forward_strand=True)
return NormalizedVariant(position, ref_sequence, alt_sequences,
genome=genome)
genome=genome, justify=justify)


class NormalizedVariant(object):
Expand All @@ -140,7 +140,7 @@ class NormalizedVariant(object):
"""

def __init__(self, position, ref_allele, alt_alleles,
seq_5p='', seq_3p='', genome=None):
seq_5p='', seq_3p='', genome=None, justify='left'):
"""
position: a 0-index genomic Position.
ref_allele: the reference allele sequence.
Expand All @@ -159,7 +159,7 @@ def __init__(self, position, ref_allele, alt_alleles,
self._on_forward_strand()
self._trim_common_prefix()
self._trim_common_suffix()
self._left_align()
self._align(justify)
self._1bp_pad()
self._set_1based_position()

Expand Down Expand Up @@ -215,11 +215,11 @@ def _trim_common_suffix(self):
for i, allele in enumerate(self.alleles):
self.alleles[i] = allele[:-common_suffix]

def _left_align(self):
def _align(self, justify):
"""
Align variant as far to the left as possible.
Align variant as far to the left or right as possible.
"""
# Left-aligning only makes sense for INDELs.
# Aligning only makes sense for INDELs.
if self.molecular_class != "INDEL":
return

Expand All @@ -236,7 +236,11 @@ def _left_align(self):
start, end, allele = justify_genomic_indel(
self.genome, self.position.chrom,
self.position.chrom_start, self.position.chrom_stop,
allele, 'left')
allele, justify)
# if right-aligning an insertion, insert at the end
if justify == 'right' and i != 0:
start += len(allele)
end += len(allele)
self.position.chrom_start = start
self.position.chrom_stop = end
flank_length = 30
Expand All @@ -248,7 +252,7 @@ def _left_align(self):
else:
offset = len(self.seq_5p)
offset2, _, allele = justify_indel(
offset, offset, allele, self.seq_5p, 'left')
offset, offset, allele, self.seq_5p, justify)
delta = offset - offset2
if delta > 0:
self.position.chrom_start -= delta
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def main():

setup(
name='pyhgvs',
version='0.9.4',
version='0.9.5',
description='HGVS name parsing and formatting',
long_description=description,
author='Matt Rasmussen',
Expand Down

0 comments on commit def8ab6

Please sign in to comment.