Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into czhu-add-plot-summary
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuchcn committed May 17, 2023
2 parents 0df7ae7 + 9462827 commit 9250b54
Show file tree
Hide file tree
Showing 10 changed files with 382 additions and 292 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

- Fixed `callVariant` issue of altSplice insertion carries an intronic indel that goes back to the original reading frame. #726

- Fixed `callVairant` to handle deletion that spans over an entire intron. #732
- Fixed `callVariant` to handle deletion that spans over an entire intron. #732

- Fixed `callVariant` to skip peptides earlier if they are either too long or too short to significantly improve efficiency. #736

- Fixed `callVariant` to handle hypermutated region with a dynamic cutoff. #738

## [0.11.5] - 2023-3-5

Expand Down
3 changes: 2 additions & 1 deletion moPepGen/cli/call_noncoding_peptide.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ def call_noncoding_peptide_main(tx_id:str, tx_model:TranscriptAnnotationModel,
check_orf=True,
denylist=canonical_peptides,
orf_assignment=orf_assignment,
w2f=w2f_reassignment
w2f=w2f_reassignment,
check_external_variants=False
)
orfs = get_orf_sequences(pgraph, tx_id, tx_model.gene_id, tx_seq)
return peptides, orfs
Expand Down
5 changes: 3 additions & 2 deletions moPepGen/svgraph/PVGNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,9 @@ def get_cleavage_gain_variants(self) -> List[seqvar.VariantRecord]:
def get_cleavage_gain_from_downstream(self) -> List[seqvar.VariantRecord]:
""" Get the variants that gains the cleavage by downstream nodes """
cleavage_gain = []
seq_len = len(self.seq.seq)
upstream_cleave_alts = [v.variant for v in self.variants
if v.location.end == len(self.seq.seq)]
if v.location.end == seq_len]
for node in self.out_nodes:
if not node.variants:
return []
Expand Down Expand Up @@ -744,7 +745,7 @@ def get_downstream_stop_altering_variants(self) -> Set[VariantRecord]:
""" Get downstream stop altering variants """
final_variants = set()
for out_node in self.out_nodes:
if out_node.seq.seq == '*':
if len(out_node.seq.seq) == 1 and out_node.seq.seq.startswith('*'):
stop_alts = set()
stop_alts.update([x.variant for x in out_node.variants
if x.is_stop_altering and not x.downstream_cleavage_altering])
Expand Down
5 changes: 4 additions & 1 deletion moPepGen/svgraph/PeptideVariantGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,8 @@ def call_variant_peptides(self, check_variants:bool=True,
gene_id=self.gene_id,
truncate_sec=truncate_sec,
w2f=w2f,
check_external_variants=check_external_variants
check_external_variants=check_external_variants,
cleavage_params=self.cleavage_params
)
traversal = PVGTraversal(
check_variants=check_variants, check_orf=check_orf,
Expand Down Expand Up @@ -855,6 +856,8 @@ def call_variant_peptides(self, check_variants:bool=True,
if check_orf:
self.create_orf_id_map()

peptide_pool.translational_modification(w2f, self.denylist)

return peptide_pool.get_peptide_sequences(
keep_all_occurrence=keep_all_occurrence, orf_id_map=self.orf_id_map
)
Expand Down
50 changes: 39 additions & 11 deletions moPepGen/svgraph/ThreeFrameTVG.py
Original file line number Diff line number Diff line change
Expand Up @@ -1473,15 +1473,38 @@ def first_node_is_smaller(self, first:TVGNode, second:TVGNode) -> bool:

return subgraph1.location < subgraph2.location

def nodes_have_too_many_variants(self, nodes:Iterable[TVGNode]) -> bool:
@staticmethod
def nodes_have_too_many_variants(nodes:Iterable[TVGNode],
max_in_bubble_variants:int) -> bool:
""" Check the total number of variants of given nodes """
if self.cleavage_params.max_variants_per_node == -1:
if max_in_bubble_variants == -1:
return False
variants = set()
for node in nodes:
for variant in node.variants:
variants.add(variant.variant)
return len(variants) > self.cleavage_params.max_variants_per_node
return len(variants) > max_in_bubble_variants

@staticmethod
def get_max_in_bubble_variants(n:int) -> int:
""" Get the `max_in_bubble_variants` based on the total number of variants
in a variant bubble. The values are set so the number of combinations to
consider won't be too much more than 5,000. """
if n <= 12:
return -1
if n <= 14:
return 7
if n <= 15:
return 6
if n <= 16:
return 5
if n <= 21:
return 4
if n <= 34:
return 3
if n <= 100:
return 2
return 1

def align_variants(self, node:TVGNode) -> Tuple[TVGNode, TVGNode]:
r""" Aligns all variants at that overlaps to the same start and end
Expand Down Expand Up @@ -1510,6 +1533,18 @@ def align_variants(self, node:TVGNode) -> Tuple[TVGNode, TVGNode]:
raise ValueError('reading_frame_index not found')
end_node, members = self.find_variant_bubble(node)

member_variants = set()
for member in members:
member_variants.update([v.variant.id for v in member.variants])

max_in_bubble_variants = self.get_max_in_bubble_variants(len(member_variants))

if len(member_variants) >= 13:
err.warning(
f"Hypermutated region detected with {len(member_variants)} variants."
f" `max_in_bubble_variants` of {max_in_bubble_variants} is used."
)

if not end_node:
raise err.FailedToFindVariantBubbleError()

Expand Down Expand Up @@ -1601,14 +1636,7 @@ def align_variants(self, node:TVGNode) -> Tuple[TVGNode, TVGNode]:

trash.add(out_node)

if self.nodes_have_too_many_variants([cur, out_node]):
if not self.hypermutated_region_warned:
err.HypermutatedRegionWarning(
self.id,
self.cleavage_params.max_variants_per_node,
self.cleavage_params.additional_variants_per_misc
)
self.hypermutated_region_warned = True
if self.nodes_have_too_many_variants([cur, out_node], max_in_bubble_variants):
continue

# create new node with the combined sequence
Expand Down
Loading

0 comments on commit 9250b54

Please sign in to comment.