added emmtyper template

Daniel-VM · Jul 26, 2024 · dd8725c · dd8725c
1 parent 77ae5e9
commit dd8725c
Showing 1 changed file with 78 additions and 0 deletions.
diff --git a/bu_isciii/templates/characterization/ANALYSIS/ANALYSIS01_CHARACTERIZATION/04-emmtyper/lablog b/bu_isciii/templates/characterization/ANALYSIS/ANALYSIS01_CHARACTERIZATION/04-emmtyper/lablog
@@ -0,0 +1,78 @@
+#!/bin/sh
+
+# Create folders
+mkdir -p data
+mkdir -p .slurm_logs_NC
+
+# Find all .gz files and write them to a file list
+# TODO: add if to check >1 fasta files are available in assembly results
+# FIXME: set path to assembly files (tmp: assembly template file path)  
+find ../../../../assembly/ANALYSIS/*_ASSEMBLY01/results/assembly/unicycler/*.fasta.gz > data/assembly_file_list.txt
+ASSEMBLY_LIST=data/assembly_file_list.txt
+
+# Get the number of files
+num_files=$(wc -l < $ASSEMBLY_LIST)
+
+scratch_dir=$(echo $PWD | sed "s/\/data\/bi\/scratch_tmp/\/scratch/g")
+
+# STEP 1: Set up jobarray to unzip fasta files
+cat <<EOF > _00_unzip_jobarray.sbatch
+#!/bin/bash
+#SBATCH --job-name=unzip_fasta
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=2
+#SBATCH --mem=8G
+#SBATCH --time=2:00:00
+#SBATCH --partition short_idx
+#SBATCH --array=1-$num_files
+#SBATCH --chdir $scratch_dir
+#SBATCH --output .slurm_logs_NC/slurm-%A_%a.out
+#SBATCH --error .slurm_logs_NC/slurm-%A_%a.err
+
+# Get the file to process
+file=\$(sed -n "\${SLURM_ARRAY_TASK_ID}p" $ASSEMBLY_LIST)
+
+# Unzip the file to the destination directory
+gzip -dkc \$file > data/\$(basename "\$file" .gz)
+
+EOF
+
+# FIXME: symb links to BLAST DATABASE?
+# FIXME: conda & singularity load
+# STEP 2: Setup exe file to perform unzip and emmtyper.
+cat <<EOF > _01_emmtyper.sbatch
+#!/bin/bash
+#SBATCH --job-name emmtyper
+#SBATCH --ntasks 1
+#SBATCH --cpus-per-task 4
+#SBATCH --mem 24G
+#SBATCH --time 4:00:00
+#SBATCH --partition short_idx
+#SBATCH --chdir $scratch_dir
+#SBATCH --output ./$(date '+%Y%m%d')_emmtyper.log
+
+# module load singularity
+# conda activate emmtyper-0.2.0
+
+# create results folder
+mkdir -p 01-typing
+mkdir -p 01-typing/tmps
+
+# Run emmtyper
+emmtyper \\
+    -w blast \\
+    --keep \\
+    --blast_db 'path_to_blastdatabase' \\
+    --percent-identity 95 \\
+    --culling-limit 5 \\
+    --output 01-typing/results_emmtyper.out \\
+    --output-format verbose \\
+    ../data/*.fasta
+
+mv *.tmp 01-typing/tmps
+
+EOF
+
+echo "#!/bin/bash" > _ALLSTEPS_emmtyper.sh
+echo "unzip_job_id=\$(sbatch _00_unzip_jobarray.sbatch | awk '{print \$4}')" >> _ALLSTEPS_emmtyper.sh
+echo "sbatch --dependency=afterok:\${unzip_job_id} _01_emmtyper.sbatch" >> _ALLSTEPS_emmtyper.sh