Skip to content

Commit

Permalink
Merge pull request #815 from uclahs-cds/czhu-fix-call-variant
Browse files Browse the repository at this point in the history
Added fuzz test log reseults
  • Loading branch information
zhuchcn authored Oct 4, 2023
2 parents ab47481 + f71344c commit 2c2022d
Show file tree
Hide file tree
Showing 20 changed files with 423 additions and 4 deletions.
3 changes: 3 additions & 0 deletions docs/files/fuzz_test_history.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ v1.2.0 04fec22 2023-08-24 comprehensive 303743 8 0 0:00:00.592038 109.2637484121
v1.2.0 84afd2b 2023-09-16 snv 50275 0 0 0:00:00.143654 0.3497673129907365 0:00:57.634105 120.90536684941736
v1.2.0 84afd2b 2023-09-16 indel 49736 0 0 0:00:00.194596 0.39638610164904325 0:00:43.395730 103.01298511471816
v1.2.0 84afd2b 2023-09-16 comprehensive 95039 1 0 0:00:00.366488 1.1783745697633377 0:00:42.658952 186.4045027067147
v1.2.0 aca093e 2023-09-21 snv 96909 0 0 0:00:00.139143 0.3491648515251173 0:00:56.182404 117.08552498241643
v1.2.0 aca093e 2023-09-21 indel 96810 1 0 0:00:00.184495 0.3863134944517933 0:00:42.898299 101.18628104172282
v1.2.0 aca093e 2023-09-21 comprehensive 184432 1 0 0:00:00.348405 1.4592854473001466 0:00:41.582605 178.84322477289754
21 changes: 19 additions & 2 deletions moPepGen/svgraph/PVGNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ def get_in_nodes(self) -> List[PVGNode]:
""" Get incoming nodes as a list """
return list(self.in_nodes)

def has_exclusive_outbond_node(self) -> bool:
""" The given node has exclusive outbond node """
return len(self.out_nodes) == 1 and \
len(self.get_out_nodes()[0].in_nodes) == 1

def has_multiple_segments(self) -> bool:
""" Whether the node has multiple segments, which is when the node
is merged from several individual nodes. """
Expand Down Expand Up @@ -1095,14 +1100,26 @@ def any_unaccounted_downstream_cleavage_or_stop_altering(self,
# stop and downstream cleavage gain
if len(self.get_out_nodes()) == 1:
downstream = self.get_out_nodes()[0]
boundary_node = downstream[:1]
b_vars = []
for v in boundary_node.variants:
if not v.upstream_cleavage_altering and not v.variant.is_circ_rna():
b_vars.append(v)
boundary_node.variants = b_vars
if not (downstream.seq.seq == '*' and not downstream.get_out_nodes()):
boundary_nodes.append(downstream[:1])
boundary_nodes.append(boundary_node)

# upstream cleavage gain cleavage gain
if len(self.get_in_nodes()) == 1:
upstream = self.get_in_nodes()[0]
boundary_node = upstream[-1:]
b_vars = []
for v in boundary_node.variants:
if not v.downstream_cleavage_altering and not v.variant.is_circ_rna():
b_vars.append(v)
boundary_node.variants = b_vars
if upstream.seq is not None:
boundary_nodes.append(upstream[-1:])
boundary_nodes.append(boundary_node)

for node in boundary_nodes:
if node.is_missing_any_variant(variants, self):
Expand Down
8 changes: 6 additions & 2 deletions moPepGen/svgraph/PeptideVariantGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,11 @@ def move_downstreams(self, nodes:Iterable[PVGNode], reading_frame_index:int
for node in nodes:
if node.seq.seq == '*' and not node.out_nodes:
continue
if node.get_last_rf_index() != reading_frame_index:
if node.get_last_rf_index() != reading_frame_index \
and len(node.get_out_nodes()) == 1 \
and not node.has_exclusive_outbond_node() \
and not len(node.get_out_nodes()[0].get_out_nodes()) == 0 \
and not node.get_out_nodes()[0].get_out_nodes()[0].seq.seq == '*':
continue
is_deletion_only_end = any(x.variant.type == 'Deletion' for x in node.variants) \
and len(node.out_nodes) == 1 \
Expand Down Expand Up @@ -746,7 +750,7 @@ def fit_into_cleavage_multiple_upstream(self, cur:PVGNode) -> T:
elif len(cur.out_nodes) == 1:
right = cur.split_node(s, cleavage=True, cleavage_range=r)
_,inbridges = self.expand_forward(cur)
branches = {list(right.out_nodes)[0]}
branches = {right}
else:
branches, inbridges = self.cross_join(cur, s, cleavage_range=r)

Expand Down
13 changes: 13 additions & 0 deletions test/files/fuzz/49/annotation.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
chr1 . gene 1 1514 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . transcript 1 1514 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373; is_protein_coding true;
chr1 . selenocysteine 892 894 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . exon 1 111 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . CDS 57 111 . + 1 gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . CDS 178 473 . + 1 gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . exon 178 473 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . CDS 813 897 . + 2 gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . exon 813 897 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . CDS 1280 1471 . + 1 gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . exon 1280 1514 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . UTR 1 56 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
chr1 . UTR 1472 1514 . + . gene_id FAKEG00000373; transcript_id FAKET00000373; protein_id FAKEP00000373;
29 changes: 29 additions & 0 deletions test/files/fuzz/49/brute_force.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
DRLAMCR
DRLAMCRVR
DRLATCEYPGLHLEFGVK
DRLATCEYPGLHLEFGVKLS
DRLATCVEFGVK
DRLATCVEFGVKLS
HYNGVNDRDRLAMCR
HYNGVNDRDRLATCVEFGVK
LAMCRVR
LAMCRVRCK
LATCEYPGLHLEFGVK
LATCEYPGLHLEFGVKLS
LATCVEFGVK
LATCVEFGVKLS
RSRPVSYM
RSRPVSYMCR
RSRPVSYMCRVR
RSRPVSYV
RSRPVSYVNIQDCI
SKRSRPVSYM
SKRSRPVSYMCR
SKRSRPVSYV
SKRSRPVSYVNIQDCI
SRPVSYM
SRPVSYMCR
SRPVSYMCRVR
SRPVSYMCRVRCK
SRPVSYV
SRPVSYVNIQDCI
26 changes: 26 additions & 0 deletions test/files/fuzz/49/fake_variants.gvf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
##fileformat=VCFv4.2
##mopepgen_version=1.2.0
##parser=parseVEP
##reference_index=
##genome_fasta=
##annotation_gtf=
##source=
##CHROM=<Description="Gene ID">
##INFO=<ID=TRANSCRIPT_ID,Number=1,Type=String,Description="Transcript ID">
##INFO=<ID=GENE_SYMBOL,Number=1,Type=String,Description="Gene Symbol">
##INFO=<ID=GENOMIC_POSITION,Number=1,Type=String,Description="Genomic Position">
##INFO=<ID=ACCEPTER_GENE_ID,Number=1,Type=String,Description="3' Accepter Transcript's Gene ID">
##INFO=<ID=ACCEPTER_TRANSCRIPT_ID,Number=1,Type=String,Description="3' Accepter Transcript's Transcript ID">
##INFO=<ID=ACCEPTER_POSITION,Number=1,Type=Integer,Description="Position of the break point of the 3' accepter transcript">
##INFO=<ID=OFFSET,Number=+,Type=Integer,Description="Offsets of fragments (exons or introns)">
##INFO=<ID=LENGTH,Number=+,Type=Integer,Description="Lengths of fragments (exons or introns)">
##INFO=<ID=INTRON,Number=+,Type=Integer,Description="Indices of fragments that are introns">
##INFO=<ID=START,Number=1,Type=Integer,Description="Start Position">
##INFO=<ID=END,Number=1,Type=Integer,Description="End Position">
##INFO=<ID=DONOR_START,Number=1,Type=Integer,Description="Donor Start Position">
##INFO=<ID=DONOR_END,Number=1,Type=Integer,Description="Donor End Position">
##INFO=<ID=COORDINATE,Number=1,Type=String,Description="Coordinate for Insertion or Substitution">
#CHROM POS ID REF ALT QUAL FILTER INFO
FAKEG00000373 1439 FAKEG00000373-1438-AAC-A AAC A . . TRANSCRIPT_ID=FAKET00000373;GENOMIC_POSITION=chrF-1438:1441;GENE_SYMBOL=
FAKEG00000373 1468 FAKEG00000373-1467-T-TAC T TAC . . TRANSCRIPT_ID=FAKET00000373;GENOMIC_POSITION=chrF-1467:1468;GENE_SYMBOL=
FAKEG00000373 1473 FAKEG00000373-1472-GAATATCCAGGATTGCATC-G GAATATCCAGGATTGCATC G . . TRANSCRIPT_ID=FAKET00000373;GENOMIC_POSITION=chrF-1472:1491;GENE_SYMBOL=
27 changes: 27 additions & 0 deletions test/files/fuzz/49/genome.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
>chr1
GTGTGCCTTAAGACAGCTTTCCGTGATTGTGCGGACCACTGCCGCCACTCGCCAATTATG
TGGGCTGAGACGGAGGGGTCGGGGAAATGTGTCGCTGCCAACCTCAATTATCGCTTTCCC
CTGGATGCCCCAAGGCCGGTGGACTCGATTAAGGGAATGAGTCTGACTTAGCTTACATGT
TCATACTTCCGCGATTGTACGACATGTACGAGCTTTAGTGCTATTCGTAATAAGAACTTC
TCGGAGATTCAGCCAGTTCATCTTGTCCAACTTGTATCGTCAAATCTGAGGATGAGAGGT
GCTCATAACCATCAGATAAGAGCGTTTCTTTTTCCTAGGTTTAGAGTACTGCCTTGGGAA
ACGGGGCTAGCAGTAGGCGGGACTAAGAGAGCCGTCGCATCGATTACCGCTCACTGCTCT
GAGCTTCCTAGAAATAACACCTACGCCTGTCTATCCCAGCTGTATATTATTTGGCACTAA
CTTAGCGCCACTAGCATTAACTGGGAGACCTCATTAATTATAGGGGAGTTCGAGTCTTTG
CACAAGTGGGAACAGCTATTTATGCTTGGCTAGCTAAGACGGCCCTAACTATGGGTGTAT
CCCCGACCTTCGGTGGTCTGTGGATTTGAACATACGCGGCTAGAGAGCGCATACGCCCAG
ACATCTTCAGTTATTGACCACGTTCGGTTGTTGACTCACTCTGCCGTTGGAGCCCCAAGT
TGGATAACCACGAGATTCGCATAGCTCATTATAGGATCGTTAATGGACGCGTTATTAGAC
TGGGCAAGTGATGGGTTTTAGGTGAGCGGATGCTGGAGATCCATTATAAATAACGTGCGA
CGAAATCCACTCTGCACTCTTTTAGGCCAATACAGGCGGCCAACGCCGATATGAACTCCT
TTTGTCCTGAACGCGGCCACGCTATTGGAGAACGCGGAAACCCTACATCTTGTGAGCATT
TGGACCAGCAAATCGTTGTTGACGATCGTGATTTGACCCGGAAAATACATCTAATGGATT
TACATTATTATTCGTTCTCTATATGACCCTAAGAGAGTCATGCATAGTCTTACCATTTCT
CTGAATATCATGATCCATTCAGGTAAAGTAAGGCTGATTGAATTGATCGGGTGAGTTGGA
CCCACGTCTGATGCCAGCCAAAATTGGAATCCCAGAAAAATCATCGTAAATTCAGCATCA
AACGCGTCGTGGGGACGATGGTCATAGCCGTGGGAGCTCTCCCGTAACTTTATCACAATG
ATGGGGCCGTCACTATACTGGTATTACATGGCTCAGGCGATTACAAGGCTGGTCGTGTGC
GACGGACGATACCGCCGCCTCGCCCGTGCACTCGAGTTCTTTTAAGAACTCAGGAAGGAG
GCCTATGTTTGCTCGACAAAGGACCAACAAGTCTTATAAGAGTCGTCGTCGGCATTATAA
CGGAGTAAACGATCGCGACCGGTTAGCTATGTGAATATCCAGGATTGCATCTAGAGTTCG
GTGTAAAATTAAGT
5 changes: 5 additions & 0 deletions test/files/fuzz/49/proteome.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
>FAKEP00000373|FAKET00000373|FAKEG00000373|XXX
MWAETEGSGKCVAANLNYCSYFRDCTTCTSFSAIRNKNFSEIQPVHLVQLVSSNLRMRGA
HNHQIRAFLFPRFRVLPWETGLAVGGTKRAVASITAHCSELPRNNTYACLSQLYIICWRS
IINNVRRNPLCTLLGQYRRPTPIUTGITWLRRLQGWSCATDDTAASPVHSSSFKNSGRRP
MFARQRTNKSYKSRRRHYNGVNDRDRLAM
5 changes: 5 additions & 0 deletions test/files/fuzz/50/annotation.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
chr1 . gene 1 584 . + . gene_id FAKEG00000416; transcript_id FAKET00000416; protein_id FAKEP00000416;
chr1 . transcript 1 584 . + . gene_id FAKEG00000416; transcript_id FAKET00000416; protein_id FAKEP00000416; is_protein_coding false;
chr1 . exon 1 130 . + . gene_id FAKEG00000416; transcript_id FAKET00000416; protein_id FAKEP00000416;
chr1 . exon 180 238 . + . gene_id FAKEG00000416; transcript_id FAKET00000416; protein_id FAKEP00000416;
chr1 . exon 410 584 . + . gene_id FAKEG00000416; transcript_id FAKET00000416; protein_id FAKEP00000416;
29 changes: 29 additions & 0 deletions test/files/fuzz/50/brute_force.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
DWASDPTGQ
DWRHCLK
DWRHCLKR
GFRPHWAVMR
GFRPHWAVMRDLYG
HCLKRYNK
HPSDGLATLSEALQ
HPSDGLAYSPAPRLTDFIECVTHK
HPSDGLAYSPAPRLTDFIECVTHKR
HPSDGLGFRPHWAVMR
HPSDGLGFRPHWAVMRDLYG
LFTCSSFNGFHR
LFTCSSFNGFHRVR
LFTCSSFNGFHRVRDA
LFTCSSFNGLS
LTDFIECVTHK
LTDFIECVTHKR
LTDFIECVTHKRGFRPHWAVMR
MDWASDPTGQ
MDWRHCLK
MDWRHCLKR
RGFRPHWAVMR
RGFRPHWAVMRDLYG
RYNKVAK
VAKLFTCSSFNGFHR
VAKLFTCSSFNGFHRVR
VAKLFTCSSFNGLS
YNKVAKLFTCSSFNGFHR
YNKVAKLFTCSSFNGLS
27 changes: 27 additions & 0 deletions test/files/fuzz/50/fake_variants.gvf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
##fileformat=VCFv4.2
##mopepgen_version=1.2.0
##parser=parseVEP
##reference_index=
##genome_fasta=
##annotation_gtf=
##source=
##CHROM=<Description="Gene ID">
##INFO=<ID=TRANSCRIPT_ID,Number=1,Type=String,Description="Transcript ID">
##INFO=<ID=GENE_SYMBOL,Number=1,Type=String,Description="Gene Symbol">
##INFO=<ID=GENOMIC_POSITION,Number=1,Type=String,Description="Genomic Position">
##INFO=<ID=ACCEPTER_GENE_ID,Number=1,Type=String,Description="3' Accepter Transcript's Gene ID">
##INFO=<ID=ACCEPTER_TRANSCRIPT_ID,Number=1,Type=String,Description="3' Accepter Transcript's Transcript ID">
##INFO=<ID=ACCEPTER_POSITION,Number=1,Type=Integer,Description="Position of the break point of the 3' accepter transcript">
##INFO=<ID=DONOR_START,Number=1,Type=Integer,Description="Donor Start Position">
##INFO=<ID=DONOR_END,Number=1,Type=Integer,Description="Donor End Position">
##INFO=<ID=START,Number=1,Type=Integer,Description="Start Position">
##INFO=<ID=END,Number=1,Type=Integer,Description="End Position">
##INFO=<ID=COORDINATE,Number=1,Type=String,Description="Coordinate for Insertion or Substitution">
##INFO=<ID=OFFSET,Number=+,Type=Integer,Description="Offsets of fragments (exons or introns)">
##INFO=<ID=LENGTH,Number=+,Type=Integer,Description="Lengths of fragments (exons or introns)">
##INFO=<ID=INTRON,Number=+,Type=Integer,Description="Indices of fragments that are introns">
#CHROM POS ID REF ALT QUAL FILTER INFO
FAKEG00000416 130 RI_195-236 C <DEL> . . TRANSCRIPT_ID=FAKET00000416;START=196;END=236;GENE_SYMBOL=;GENOMIC_POSITION=chrF:196:236
FAKEG00000416 130 A3SS_129-144-179 C <INS> . . TRANSCRIPT_ID=FAKET00000416;DONOR_START=145;DONOR_END=179;DONOR_GENE_ID=FAKEG00000416;GENE_SYMBOL=;GENOMIC_POSITION=chrF:145:179
FAKEG00000416 206 FAKEG00000416-205-AC-A AC A . . TRANSCRIPT_ID=FAKET00000416;GENOMIC_POSITION=chrF-205:207;GENE_SYMBOL=
FAKEG00000416 238 A5SS_567-584 A <DEL> . . TRANSCRIPT_ID=FAKET00000416;START=568;END=584;GENE_SYMBOL=;GENOMIC_POSITION=chrF:568:584
11 changes: 11 additions & 0 deletions test/files/fuzz/50/genome.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
>chr1
CTCGTTCTCAGCGCCCGCTGGGTTTGCATTTCTGCTAGAGTTGCGGCCTTGGACTGCACA
TGTTCGCTCCGACTCCAACGCTCGCTGTCCAGCCTGGAGAGTCTCATAAGCATCCGAGCG
ATGGACTGGCCGCGAATCGTGAGGGACATTGTCTGAAGCGCTACAATAAAGTGGCAAAGC
TATTCACCTGCTCCTCGTTTAACGGACTTTCATAGAGTGCGTGACGCATAAGCGAGGCTC
GGCACTTAGGTTACCTTTCGCTTATACCAAGTCTGAGAGTGAAAGCCGCCTAAGTTGATG
CCAGCGCTAGATGGTCAGAGCAGATGGTCACGATAGAGCGTGGCTCATGCAATCCGAGTT
ATGCTAAGTAAACCCGTCCGGGTCAAAGCGGACTTCAGCTGATGATTGGTTCCGACCCCA
CTGGGCAGTGATGCGAGACCTTTACGGATAAGAGGGGGGCGTCTGTTGGATGAGGCCCAT
GCACCGCGCCTATGCCCCTATAACGCCCACGGTTGATAACGCGAGGTCGCAGCGGAAAAT
ATCCTCTTGATTGTCACATTTGGCGCTAGATTCAGAAGATTAAT
Empty file.
14 changes: 14 additions & 0 deletions test/files/fuzz/51/annotation.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
chr1 . gene 1 1115 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . transcript 1 1115 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399; is_protein_coding true;
chr1 . selenocysteine 806 808 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . selenocysteine 1047 1049 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . exon 1 131 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . CDS 99 131 . + 1 gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . CDS 248 371 . + 2 gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . exon 248 371 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . CDS 770 925 . + 1 gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . exon 770 925 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . CDS 1008 1067 . + 1 gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . exon 1008 1115 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . UTR 1 98 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
chr1 . UTR 1068 1115 . + . gene_id FAKEG00000399; transcript_id FAKET00000399; protein_id FAKEP00000399;
99 changes: 99 additions & 0 deletions test/files/fuzz/51/brute_force.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
AIELSVCSAYAGPKQK
AIELSVCSAYAGPKQKFFPTITGR
AIELSVCSAYAGPKQKFFPTTTGR
AIELSVCSAYAGPKQNFFPTTTGR
AIELSVCSAYAGPKQNFFPTTTGRK
AIELSVYSAYAGPK
AIELSVYSAYAGPKQK
AIELSVYSAYAGPKQKFFPTITGR
AIELSVYSAYAGPKQKFFPTTTGR
AIELSVYSAYAGPKQNFFPTITGR
AIELSVYSAYAGPKQNFFPTITGRK
AIELSVYSAYAGPKQNFFPTTTGR
AIELSVYSAYAGPKQNFFPTTTGRK
ARRGDHSILNNDDDR
AVKGGAR
AVKGGARLTR
FFPTITGR
FFPTITGRK
FFPTITGRKATSHSDTR
FFPTTTGR
FFPTTTGRK
FFPTTTGRKATSHSDTR
GDHSILNNDDDR
GDHSILNNDDDRAVK
GDHSILNNDDDRAVKGGAR
GGARLTR
GGARLTRR
GSQLSQWHVAVTTQF
LDRSRSFFLP
LDRSRSFFLPQR
LDRSRTFFLP
LDRSRTFFLPQR
LDSKGSQLSQWHVAVTTQF
LLHIQILD
LRLDSKGSQLSQWHVAVTTQF
LTRRGPIR
MAIELSVCSAYAGPKQK
MAIELSVCSAYAGPKQKFFPTITGR
MAIELSVCSAYAGPKQKFFPTTTGR
MAIELSVCSAYAGPKQNFFPTTTGR
MAIELSVYSAYAGPK
MAIELSVYSAYAGPKQK
MAIELSVYSAYAGPKQKFFPTITGR
MAIELSVYSAYAGPKQKFFPTTTGR
MAIELSVYSAYAGPKQNFFPTITGR
MAIELSVYSAYAGPKQNFFPTTTGR
MARRGDHSILNNDDDR
MLDRSRSFFLP
MLDRSRSFFLPQR
MLDRSRTFFLP
MLDRSRTFFLPQR
MTGRWPLNCLSIVPMLDR
MTGRWPLNCLSIVPMLDRSR
MTGRWPLNCLSVVPMLDR
MTGRWPLNCLSVVPMLDRSR
QKFFPTITGR
QKFFPTITGRK
QKFFPTTTGR
QKFFPTTTGRK
QNFFPTTTGR
QNFFPTTTGRK
QNFFPTTTGRKATSHSDTR
RGDHSILNNDDDR
RGDHSILNNDDDRAVK
RRSFANTFYR
RSFANTFYR
RSFANTFYRYR
SFANTFYR
SFANTFYRYR
SFANTFYRYRAT
SFFLPQR
SFFLPQRVER
SFFLPQRVERLLHIQILD
SRSFFLP
SRSFFLPQR
SRSFFLPQRVER
SRTFFLP
SRTFFLPQR
SRTFFLPQRVER
TFFLPQR
TFFLPQRVER
TFFLPQRVERLLHIQILD
TGRWPLNCLSIVPMLDR
TGRWPLNCLSIVPMLDRSR
TGRWPLNCLSVVPMLDR
TGRWPLNCLSVVPMLDRSR
VERLLHIQILD
WPLNCLSIVPMLDR
WPLNCLSIVPMLDRSR
WPLNCLSIVPMLDRSRSFFLP
WPLNCLSIVPMLDRSRSFFLPQR
WPLNCLSIVPMLDRSRTFFLP
WPLNCLSIVPMLDRSRTFFLPQR
WPLNCLSVVPMLDR
WPLNCLSVVPMLDRSR
WPLNCLSVVPMLDRSRSFFLP
WPLNCLSVVPMLDRSRSFFLPQR
WPLNCLSVVPMLDRSRTFFLP
WPLNCLSVVPMLDRSRTFFLPQR
25 changes: 25 additions & 0 deletions test/files/fuzz/51/fake_circ_rna.gvf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
##fileformat=VCFv4.2
##mopepgen_version=1.2.0
##parser=parseCIRCexplorer
##reference_index=
##genome_fasta=
##annotation_gtf=
##source=
##CHROM=<Description="Gene ID">
##INFO=<ID=TRANSCRIPT_ID,Number=1,Type=String,Description="Transcript ID">
##INFO=<ID=GENE_SYMBOL,Number=1,Type=String,Description="Gene Symbol">
##INFO=<ID=GENOMIC_POSITION,Number=1,Type=String,Description="Genomic Position">
##INFO=<ID=OFFSET,Number=+,Type=Integer,Description="Offsets of fragments (exons or introns)">
##INFO=<ID=LENGTH,Number=+,Type=Integer,Description="Lengths of fragments (exons or introns)">
##INFO=<ID=INTRON,Number=+,Type=Integer,Description="Indices of fragments that are introns">
##INFO=<ID=ACCEPTER_GENE_ID,Number=1,Type=String,Description="3' Accepter Transcript's Gene ID">
##INFO=<ID=ACCEPTER_TRANSCRIPT_ID,Number=1,Type=String,Description="3' Accepter Transcript's Transcript ID">
##INFO=<ID=ACCEPTER_POSITION,Number=1,Type=Integer,Description="Position of the break point of the 3' accepter transcript">
##INFO=<ID=DONOR_START,Number=1,Type=Integer,Description="Donor Start Position">
##INFO=<ID=DONOR_END,Number=1,Type=Integer,Description="Donor End Position">
##INFO=<ID=START,Number=1,Type=Integer,Description="Start Position">
##INFO=<ID=END,Number=1,Type=Integer,Description="End Position">
##INFO=<ID=COORDINATE,Number=1,Type=String,Description="Coordinate for Insertion or Substitution">
##POS=<Description="Gene coordinate of circRNA start">
#CHROM POS ID REF ALT QUAL FILTER INFO
FAKEG00000399 0 CIRC-FAKET00000399-E1-E2-E3 . . . . OFFSET=0,247,769;LENGTH=131,124,156;INTRON=;TRANSCRIPT_ID=FAKET00000399;GENE_SYMBOL=;GENOMIC_POSITION=chrF:0
Loading

0 comments on commit 2c2022d

Please sign in to comment.