-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add more benchmark tests, different # of sequences
- Loading branch information
Showing
3 changed files
with
102 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,30 @@ | ||
cd ../ | ||
# TODO: setup github cache | ||
SCRIPTPATH=$(dirname "$PWD") | ||
|
||
# Install seqtk | ||
cd ~ | ||
git clone https://github.com/lh3/seqtk.git | ||
cd seqtk | ||
make | ||
sudo cp seqtk /usr/local/bin/ | ||
seqtk | ||
|
||
# Download reads.fastq.gz file (1 milion sequences) | ||
cd $SCRIPTPATH | ||
mkdir data | ||
cd data | ||
curl -LO https://github.com/niemasd/ViralConsensus-Paper/raw/main/data/time_memory_benchmark/reads.fastq.gz | ||
curl -LO https://github.com/Niema-Lab/ViralWasm-Consensus/raw/master/public/data/NC_045512.2.fas | ||
curl -LO https://github.com/Niema-Lab/ViralWasm-Consensus/raw/master/public/data/example.bam | ||
curl -LO https://github.com/Niema-Lab/ViralWasm-Consensus/raw/master/public/data/example.bam | ||
|
||
# Subsample: 100k reads | ||
seqtk sample -s100 reads.fastq.gz 100000 | gzip > reads_100k.fastq.gz | ||
|
||
# Subsample: 10k reads | ||
seqtk sample -s100 reads.fastq.gz 10000 | gzip > reads_10k.fastq.gz | ||
|
||
# Subsample: 1k reads | ||
seqtk sample -s100 reads.fastq.gz 1000 | gzip > reads_1k.fastq.gz | ||
|
||
# Subsample: 100 reads | ||
seqtk sample -s100 reads.fastq.gz 100 | gzip > reads_100.fastq.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,67 @@ | ||
cd ../data | ||
|
||
|
||
|
||
### TEST #1: Example data | ||
OUT_DIR=../../benchmarks/example-uploaded/cli/ | ||
mkdir -p $OUT_DIR | ||
|
||
/usr/bin/time -v viral_consensus -i example.bam -r NC_045512.2.fas -o "$OUT_DIR/consensus.fa" -q 20 -d 10 -f 0.5 -a N 2> time_output.log | ||
/usr/bin/time -v viral_consensus -i example.bam -r NC_045512.2.fas -o "$OUT_DIR/consensus.fa" -q 20 -d 10 -f 0.5 -a N 2>time_output.log | ||
|
||
grep "User time (seconds): " time_output.log | awk '{print $4}' > "$OUT_DIR/time.log" | ||
grep "Maximum resident set size (kbytes): " time_output.log | awk '{print $6}' > "$OUT_DIR/memory.log" | ||
grep "User time (seconds): " time_output.log | awk '{print $4}' >"$OUT_DIR/time.log" | ||
grep "Maximum resident set size (kbytes): " time_output.log | awk '{print $6}' >"$OUT_DIR/memory.log" | ||
|
||
rm time_output.log | ||
|
||
### TEST #2: Full reads.fastq.gz file | ||
OUT_DIR=../../benchmarks/large-dataset/cli/ | ||
### TEST #2: Full reads.fastq.gz file (1 million sequences) | ||
OUT_DIR=../../benchmarks/1000000/cli/ | ||
mkdir -p $OUT_DIR | ||
|
||
minimap2 -t 1 -a -o reads.sam NC_045512.2.fas reads.fastq.gz | ||
/usr/bin/time -v viral_consensus -i reads.sam -r NC_045512.2.fas -o "$OUT_DIR/consensus.fa" -q 20 -d 10 -f 0.5 -a N 2> time_output.log | ||
/usr/bin/time -v viral_consensus -i reads.sam -r NC_045512.2.fas -o "$OUT_DIR/consensus.fa" -q 20 -d 10 -f 0.5 -a N 2>time_output.log | ||
|
||
grep "User time (seconds): " time_output.log | awk '{print $4}' > "$OUT_DIR/time.log" | ||
grep "Maximum resident set size (kbytes): " time_output.log | awk '{print $6}' > "$OUT_DIR/memory.log" | ||
grep "User time (seconds): " time_output.log | awk '{print $4}' >"$OUT_DIR/time.log" | ||
grep "Maximum resident set size (kbytes): " time_output.log | awk '{print $6}' >"$OUT_DIR/memory.log" | ||
|
||
rm reads.sam | ||
rm time_output.log | ||
### TEST #3: 100k reads.fastq.gz file (100k sequences) | ||
OUT_DIR=../../benchmarks/100000/cli/ | ||
mkdir -p $OUT_DIR | ||
|
||
minimap2 -t 1 -a -o reads.sam NC_045512.2.fas reads_100k.fastq.gz | ||
/usr/bin/time -v viral_consensus -i reads.sam -r NC_045512.2.fas -o "$OUT_DIR/consensus.fa" -q 20 -d 10 -f 0.5 -a N 2>time_output.log | ||
|
||
grep "User time (seconds): " time_output.log | awk '{print $4}' >"$OUT_DIR/time.log" | ||
grep "Maximum resident set size (kbytes): " time_output.log | awk '{print $6}' >"$OUT_DIR/memory.log" | ||
|
||
### TEST #4: 10k reads.fastq.gz file (10k sequences) | ||
OUT_DIR=../../benchmarks/10000/cli/ | ||
mkdir -p $OUT_DIR | ||
|
||
minimap2 -t 1 -a -o reads.sam NC_045512.2.fas reads_10k.fastq.gz | ||
/usr/bin/time -v viral_consensus -i reads.sam -r NC_045512.2.fas -o "$OUT_DIR/consensus.fa" -q 20 -d 10 -f 0.5 -a N 2>time_output.log | ||
|
||
grep "User time (seconds): " time_output.log | awk '{print $4}' >"$OUT_DIR/time.log" | ||
grep "Maximum resident set size (kbytes): " time_output.log | awk '{print $6}' >"$OUT_DIR/memory.log" | ||
|
||
### TEST #5: 1k reads.fastq.gz file (1k sequences) | ||
OUT_DIR=../../benchmarks/1000/cli/ | ||
mkdir -p $OUT_DIR | ||
|
||
minimap2 -t 1 -a -o reads.sam NC_045512.2.fas reads_1k.fastq.gz | ||
/usr/bin/time -v viral_consensus -i reads.sam -r NC_045512.2.fas -o "$OUT_DIR/consensus.fa" -q 20 -d 10 -f 0.5 -a N 2>time_output.log | ||
|
||
grep "User time (seconds): " time_output.log | awk '{print $4}' >"$OUT_DIR/time.log" | ||
grep "Maximum resident set size (kbytes): " time_output.log | awk '{print $6}' >"$OUT_DIR/memory.log" | ||
|
||
### TEST #6: 100 reads.fastq.gz file (100 sequences) | ||
OUT_DIR=../../benchmarks/100/cli/ | ||
mkdir -p $OUT_DIR | ||
|
||
minimap2 -t 1 -a -o reads.sam NC_045512.2.fas reads_100.fastq.gz | ||
/usr/bin/time -v viral_consensus -i reads.sam -r NC_045512.2.fas -o "$OUT_DIR/consensus.fa" -q 20 -d 10 -f 0.5 -a N 2>time_output.log | ||
|
||
grep "User time (seconds): " time_output.log | awk '{print $4}' >"$OUT_DIR/time.log" | ||
grep "Maximum resident set size (kbytes): " time_output.log | awk '{print $6}' >"$OUT_DIR/memory.log" | ||
|
||
|
||
|
||
rm reads.sam | ||
rm time_output.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters