diff --git a/bu_isciii/templates/characterization/ANALYSIS/ANALYSIS01_CHARACTERIZATION/04-emmtyper/lablog b/bu_isciii/templates/characterization/ANALYSIS/ANALYSIS01_CHARACTERIZATION/04-emmtyper/lablog new file mode 100644 index 00000000..59033b0e --- /dev/null +++ b/bu_isciii/templates/characterization/ANALYSIS/ANALYSIS01_CHARACTERIZATION/04-emmtyper/lablog @@ -0,0 +1,78 @@ +#!/bin/sh + +# Create folders +mkdir -p data +mkdir -p .slurm_logs_NC + +# Find all .gz files and write them to a file list +# TODO: add if to check >1 fasta files are available in assembly results +# FIXME: set path to assembly files (tmp: assembly template file path) +find ../../../../assembly/ANALYSIS/*_ASSEMBLY01/results/assembly/unicycler/*.fasta.gz > data/assembly_file_list.txt +ASSEMBLY_LIST=data/assembly_file_list.txt + +# Get the number of files +num_files=$(wc -l < $ASSEMBLY_LIST) + +scratch_dir=$(echo $PWD | sed "s/\/data\/bi\/scratch_tmp/\/scratch/g") + +# STEP 1: Set up jobarray to unzip fasta files +cat < _00_unzip_jobarray.sbatch +#!/bin/bash +#SBATCH --job-name=unzip_fasta +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=8G +#SBATCH --time=2:00:00 +#SBATCH --partition short_idx +#SBATCH --array=1-$num_files +#SBATCH --chdir $scratch_dir +#SBATCH --output .slurm_logs_NC/slurm-%A_%a.out +#SBATCH --error .slurm_logs_NC/slurm-%A_%a.err + +# Get the file to process +file=\$(sed -n "\${SLURM_ARRAY_TASK_ID}p" $ASSEMBLY_LIST) + +# Unzip the file to the destination directory +gzip -dkc \$file > data/\$(basename "\$file" .gz) + +EOF + +# FIXME: symb links to BLAST DATABASE? +# FIXME: conda & singularity load +# STEP 2: Setup exe file to perform unzip and emmtyper. +cat < _01_emmtyper.sbatch +#!/bin/bash +#SBATCH --job-name emmtyper +#SBATCH --ntasks 1 +#SBATCH --cpus-per-task 4 +#SBATCH --mem 24G +#SBATCH --time 4:00:00 +#SBATCH --partition short_idx +#SBATCH --chdir $scratch_dir +#SBATCH --output ./$(date '+%Y%m%d')_emmtyper.log + +# module load singularity +# conda activate emmtyper-0.2.0 + +# create results folder +mkdir -p 01-typing +mkdir -p 01-typing/tmps + +# Run emmtyper +emmtyper \\ + -w blast \\ + --keep \\ + --blast_db 'path_to_blastdatabase' \\ + --percent-identity 95 \\ + --culling-limit 5 \\ + --output 01-typing/results_emmtyper.out \\ + --output-format verbose \\ + ../data/*.fasta + +mv *.tmp 01-typing/tmps + +EOF + +echo "#!/bin/bash" > _ALLSTEPS_emmtyper.sh +echo "unzip_job_id=\$(sbatch _00_unzip_jobarray.sbatch | awk '{print \$4}')" >> _ALLSTEPS_emmtyper.sh +echo "sbatch --dependency=afterok:\${unzip_job_id} _01_emmtyper.sbatch" >> _ALLSTEPS_emmtyper.sh