Skip to content

Commit

Permalink
Improved protein annotations with plasmids coordinates
Browse files Browse the repository at this point in the history
  • Loading branch information
cpauvert committed May 19, 2023
1 parent 09efb8e commit 189e77d
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ The column of the output table are detailed in the next section with examples.

## Changelog

* v7.0: Concatenate genome and plasmids sequences (if any). Add plasmids proteins annotations.
* v6.1: Fix conda environments setup by updating snakemake wrappers.
* v6.0: Checksums for both the genome archive and FASTA file are computed. Improved the table. Assembly with unpaired reads is possible.
* v5.3.2: Fixes for conda environments
Expand Down
40 changes: 38 additions & 2 deletions workflow/rules/quality_check.smk
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,32 @@ rule rewrite_genome_headers:
"""


rule get_contig_list:
input:
"results/quality_check/{isolate}/{isolate}.genome.fa",
output:
"results/quality_check/{isolate}/{isolate}.genome.tsv",
threads: 1
shell:
"""
grep '>' {input} | sed 's/>//' |\
awk '{{OFS="\t"; print $0,$0,"contig","linear","-"}}' > {output}
"""


rule get_plasmid_list:
input:
"results/plasmid_reconstruction/{isolate}/assembly_graph.cycs.fasta",
output:
"results/plasmid_reconstruction/{isolate}/{isolate}.plasmids.tsv",
threads: 1
shell:
"""
grep '>' {input} | sed 's/>//' |\
awk '{{OFS="\t"; print $0,$0,"plasmid","circular","-"}}' > {output}
"""


rule combine_genome_plasmids:
input:
"results/quality_check/{isolate}/{isolate}.genome.fa",
Expand All @@ -44,6 +70,14 @@ rule combine_genome_plasmids:
"cat {input} > {output}"


use rule combine_genome_plasmids as create_replicon_list with:
input:
"results/quality_check/{isolate}/{isolate}.genome.tsv",
"results/plasmid_reconstruction/{isolate}/{isolate}.plasmids.tsv",
output:
"results/quality_check/{isolate}/{isolate}.combined.tsv",


rule checkM_for_quality:
input:
"results/quality_check/{isolate}/{isolate}.combined.fa",
Expand Down Expand Up @@ -82,7 +116,8 @@ rule download_bakta_db:

rule bakta_for_annotation:
input:
"results/quality_check/{isolate}/{isolate}.combined.fa",
fasta="results/quality_check/{isolate}/{isolate}.combined.fa",
replicon="results/quality_check/{isolate}/{isolate}.combined.tsv",
output:
"results/quality_check/{isolate}/bakta/{isolate}.tsv",
log:
Expand All @@ -99,7 +134,8 @@ rule bakta_for_annotation:
--prefix {wildcards.isolate} \
--locus-tag {wildcards.isolate} \
--output {params.outdir} \
--threads {threads} {input} &> {log}
--replicons {input.replicon} \
--threads {threads} {input.fasta} &> {log}
"""


Expand Down

0 comments on commit 189e77d

Please sign in to comment.