Skip to content

Commit

Permalink
Merge pull request #451 from uclahs-cds/czhu-fix-vep-parser
Browse files Browse the repository at this point in the history
Skip MNVs in parseVEP
  • Loading branch information
lydiayliu authored May 12, 2022
2 parents ce472b3 + 70d3098 commit 1c6c03d
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 15 deletions.
10 changes: 6 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Changelog

All notable changes to the tool_name Docker file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
Expand All @@ -9,7 +10,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [Unreleased]

## [0.4.2] - 2002-05-11
## [0.4.2] - 2022-05-11

### Changed

- A warning is raised in `parseVEP` when tryping to parse a MNV (multi-nucleotide variant) and skip the record instead of raising an error. #447

- Fixed `summarizeFasta` that source order of `Noncoding` was not recognized. #449

Expand All @@ -25,17 +30,14 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

- Fixed the issue that in `splitFasta`, variant sources are not grouped as they are specified by `--group-source` #439


### Added

- Resources usage including memory, CPU and time is now printed to stdout in the end of all command line programs.


### Fixed

- Fixed issue that `--additional-split` not recognized properly in `splitFasta`. #443


---

## [0.4.0] - 2022-03-17
Expand Down
2 changes: 1 addition & 1 deletion moPepGen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Iterable, IO


__version__ = '0.4.1'
__version__ = '0.4.2'

## Error messages
ERROR_INDEX_IN_INTRON = 'The genomic index seems to be in an intron'
Expand Down
7 changes: 6 additions & 1 deletion moPepGen/cli/parse_vep.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Dict, List
from pathlib import Path
from moPepGen.parser import VEPParser
from moPepGen.err import TranscriptionStopSiteMutationError, \
from moPepGen.err import MNVParsingError, TranscriptionStopSiteMutationError, \
TranscriptionStartSiteMutationError, warning
from moPepGen import seqvar, logger
from moPepGen.cli import common
Expand Down Expand Up @@ -74,6 +74,11 @@ def parse_vep(args:argparse.Namespace) -> None:
continue
except TranscriptionStartSiteMutationError:
continue
except MNVParsingError:
warning(
f"MNVs are not currently supported. Skipping record: {record}"
)
continue

vep_records[transcript_id].append(record)

Expand Down
7 changes: 7 additions & 0 deletions moPepGen/err.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ def __init__(self, gene_id:str):
msg = f"Gene {gene_id} not found."
super().__init__(msg)

class MNVParsingError(Exception):
""" Error to be raised when trying to parse MNVs (multi-nucleotide variant). """
def __init__(self):
""" constructor """
msg = "Trying to parse a MNV, which is currently unsupported."
super().__init__(msg)

def warning(msg:str) -> None:
""" print a warning message """
logger(f"[ !!! moPepGen WARNING !!! ] {msg}")
Expand Down
10 changes: 3 additions & 7 deletions moPepGen/parser/VEPParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from Bio.Seq import Seq
from moPepGen.SeqFeature import FeatureLocation
from moPepGen.err import TranscriptionStopSiteMutationError, \
TranscriptionStartSiteMutationError
TranscriptionStartSiteMutationError, MNVParsingError
from moPepGen import seqvar, dna, gtf


Expand Down Expand Up @@ -174,9 +174,7 @@ def convert_to_variant_record(self, anno:gtf.GenomicAnnotation,
allele = str(Seq(allele).reverse_complement())
if alt_end - alt_start == 1:
if len(allele) > 1:
raise ValueError(
f'Could not recognize the VEP record. Transcript: [{self.feature}]'
)
raise MNVParsingError()
ref = str(seq.seq[alt_start])
alt = allele
elif alt_end - alt_start == 2:
Expand All @@ -185,9 +183,7 @@ def convert_to_variant_record(self, anno:gtf.GenomicAnnotation,
alt = ref + allele
else:
if len(allele) > 1:
raise ValueError(
f'Could not recognize the VEP record. Transcript: [{self.feature}]'
)
raise MNVParsingError()
ref = str(seq.seq[alt_start:alt_end])
alt = allele

Expand Down
3 changes: 2 additions & 1 deletion test/files/vep/vep_indel.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
rs59404993 chr22:399-400 C ENSG00000128408.9 ENST00000614167.2 Transcript frameshift_variant 399-400 205-206 69 T/TX acc/aCcc - IMPACT=HIGH;STRAND=1;SOURCE=GENCODEv34
rs59404993 chr22:4981-4983 - ENSG00000099949.21 ENST00000642151.1 Transcript start_lost,inframe_deletion 1-3 1-3 1 M/- ATG/- - IMPACT=HIGH;STRAND=1;SOURCE=GENCODEv34
rs59404993 chr22:399-401 AAA ENSG00000128408.9 ENST00000614167.2 Transcript frameshift_variant 399-400 205-206 69 T/TX acc/aCcc - IMPACT=HIGH;STRAND=1;SOURCE=GENCODEv34
rs59404993 chr22:4981-4983 - ENSG00000099949.21 ENST00000642151.1 Transcript start_lost,inframe_deletion 1-3 1-3 1 M/- ATG/- - IMPACT=HIGH;STRAND=1;SOURCE=GENCODEv34
26 changes: 25 additions & 1 deletion test/unit/test_vep_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from test.unit import create_genomic_annotation, create_dna_record_dict
from moPepGen.parser import VEPParser
from moPepGen.err import TranscriptionStopSiteMutationError, \
TranscriptionStartSiteMutationError
TranscriptionStartSiteMutationError, MNVParsingError


GENOME_DATA = {
Expand Down Expand Up @@ -479,5 +479,29 @@ def test_vep_to_variant_record_case15_deletion(self):
self.assertEqual(record.ref, 'CTAT')
self.assertEqual(record.alt, 'T')

def test_vep_to_variant_mnv_error(self):
""" error is raised for MNV """
genome = create_dna_record_dict(GENOME_DATA)
anno = create_genomic_annotation(ANNOTATION_DATA)

vep_record = VEPParser.VEPRecord(
uploaded_variation='rs55971985',
location='chr1:19-22',
allele='AAA',
gene='ENSG0001',
feature='ENST0001.1',
feature_type='Transcript',
consequences=['missense_variant'],
cdna_position='11',
cds_position='11',
protein_position=3,
amino_acids=('S', 'T'),
codons=('CCT', 'AAA'),
existing_variation='-',
extra={}
)
with self.assertRaises(MNVParsingError):
vep_record.convert_to_variant_record(anno, genome)

if __name__ == '__main__':
unittest.main()

0 comments on commit 1c6c03d

Please sign in to comment.