Skip to content

Commit

Permalink
Merge pull request #856 from uclahs-cds/czhu-fix-call-variant
Browse files Browse the repository at this point in the history
Peptide Table saved by callVariant
  • Loading branch information
zhuchcn authored Mar 7, 2024
2 parents f78ca08 + 4e6f79d commit 73fa63d
Show file tree
Hide file tree
Showing 19 changed files with 583 additions and 191 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

- Added `updateIndex` #853

- Output a peptide table for additional information. #833

## [1.2.1] - 2023-10-05

### Add
Expand Down
12 changes: 12 additions & 0 deletions moPepGen/SeqFeature.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ def is_superset(self, other:FeatureLocation) -> bool:
""" Find whether the location is a superset of the other """
return self.start <= other.start and self.end >= other.end

def shift(self, i:int) -> FeatureLocation:
""" shift """
return FeatureLocation(
seqname=self.seqname,
start=self.start + i,
end=self.end + i,
reading_frame_index=self.reading_frame_index,
start_offset=self.start_offset,
end_offset=self.end_offset,
strand=self.strand
)


class SeqFeature(BioSeqFeature):
""" Models the annotation of a given range of the sequence. This extends
Expand Down
13 changes: 7 additions & 6 deletions moPepGen/aa/VariantPeptidePool.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, peptides:Set[AminoAcidSeqRecord]=None):

def add_peptide(self, peptide:AminoAcidSeqRecord,
canonical_peptides:Set[str], cleavage_params:params.CleavageParams=None,
skip_checking:bool=False):
skip_checking:bool=False) -> bool:
""" Add a peptide to the pool if it does not already exist. Otherwise,
the label is appended to the existing same peptide.
Expand All @@ -36,12 +36,12 @@ def add_peptide(self, peptide:AminoAcidSeqRecord,
min_mw = cleavage_params.min_mw
min_length = cleavage_params.min_length
max_length = cleavage_params.max_length
if SeqUtils.molecular_weight(peptide.seq, 'protein') < min_mw:
return
if len(peptide.seq) < min_length or len(peptide.seq) > max_length:
return
if SeqUtils.molecular_weight(peptide, 'protein') < min_mw:
return False
if len(peptide.seq) < min_length or len(peptide) > max_length:
return False
if str(peptide.seq) in canonical_peptides:
return
return False
same_peptide = get_equivalent(self.peptides, peptide)
if same_peptide:
same_peptide:Seq
Expand All @@ -51,6 +51,7 @@ def add_peptide(self, peptide:AminoAcidSeqRecord,
same_peptide.name = same_peptide.description
else:
self.peptides.add(peptide)
return True

def remove_redundant_headers(self):
""" remove redundant fasta header entries """
Expand Down
29 changes: 25 additions & 4 deletions moPepGen/cli/call_alt_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
harbor any alternative translation event. """
from __future__ import annotations
import argparse
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Dict, Set
from pathlib import Path
from moPepGen import params, svgraph, aa, get_logger
from moPepGen import params, svgraph, aa, get_logger, VARIANT_PEPTIDE_SOURCE_DELIMITER
from moPepGen.cli import common


if TYPE_CHECKING:
from Bio.Seq import Seq
from moPepGen.gtf import TranscriptAnnotationModel, GenomicAnnotation
from moPepGen.dna import DNASeqDict

Expand Down Expand Up @@ -125,15 +126,35 @@ def call_alt_translation_main(tx_id:str, tx_model:TranscriptAnnotationModel,
cds_start_nf=tx_model.is_cds_start_nf(),
has_known_orf=True,
mrna_end_nf=tx_model.is_mrna_end_nf(),
cleavage_params=cleavage_params
cleavage_params=cleavage_params,
coordinate_feature_type='transcript',
coordinate_feature_id=tx_id
)
dgraph.gather_sect_variants(anno)
dgraph.init_three_frames()
pgraph = dgraph.translate()
pgraph.create_cleavage_graph()
return pgraph.call_variant_peptides(
peptide_anno = pgraph.call_variant_peptides(
check_variants=True,
truncate_sec=sec_truncation,
w2f=w2f_reassignment,
check_external_variants=False
)
peptide_map:Dict[Seq, Set[str]] = {}
for seq, annotated_labels in peptide_anno.items():
for label in annotated_labels:
if seq in peptide_map:
peptide_map[seq].add(label.label)
else:
peptide_map[seq] = {label.label}
peptides = set()
for seq, labels in peptide_map.items():
label = VARIANT_PEPTIDE_SOURCE_DELIMITER.join(labels)
peptides.add(
aa.AminoAcidSeqRecord(
seq=seq,
description=label,
name=label
)
)
return peptides
28 changes: 24 additions & 4 deletions moPepGen/cli/call_noncoding_peptide.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
It finds all start codons of any noncoding gene. """
from __future__ import annotations
import argparse
from typing import TYPE_CHECKING, Set, List, Tuple, IO
from typing import TYPE_CHECKING, Set, List, Tuple, IO, Dict
from pathlib import Path
from Bio.SeqIO import FastaIO
from moPepGen import params, svgraph, aa, get_logger
from moPepGen import params, svgraph, aa, get_logger, VARIANT_PEPTIDE_SOURCE_DELIMITER
from moPepGen.dna.DNASeqRecord import DNASeqRecordWithCoordinates
from moPepGen.err import ReferenceSeqnameNotFoundError, warning
from moPepGen.cli import common


if TYPE_CHECKING:
from Bio.Seq import Seq
from moPepGen.gtf import TranscriptAnnotationModel
from moPepGen.dna import DNASeqDict

Expand Down Expand Up @@ -190,19 +191,38 @@ def call_noncoding_peptide_main(tx_id:str, tx_model:TranscriptAnnotationModel,
cds_start_nf=True,
has_known_orf=False,
cleavage_params=cleavage_params,
gene_id=tx_model.gene_id
gene_id=tx_model.gene_id,
coordinate_feature_type='transcript',
coordinate_feature_id=tx_id
)
dgraph.init_three_frames()
pgraph = dgraph.translate()
pgraph.create_cleavage_graph()
peptides = pgraph.call_variant_peptides(
peptide_anno = pgraph.call_variant_peptides(
check_variants=False,
check_orf=True,
denylist=canonical_peptides,
orf_assignment=orf_assignment,
w2f=w2f_reassignment,
check_external_variants=False
)
peptide_map:Dict[Seq, Set[str]] = {}
for seq, annotated_labels in peptide_anno.items():
for label in annotated_labels:
if seq in peptide_map:
peptide_map[seq].add(label.label)
else:
peptide_map[seq] = {label.label}
peptides = set()
for seq, labels in peptide_map.items():
label = VARIANT_PEPTIDE_SOURCE_DELIMITER.join(labels)
peptides.add(
aa.AminoAcidSeqRecord(
seq=seq,
description=label,
name=label
)
)
orfs = get_orf_sequences(pgraph, tx_id, tx_model.gene_id, tx_seq)
return peptides, orfs

Expand Down
Loading

0 comments on commit 73fa63d

Please sign in to comment.