diff --git a/cudaaligner/CMakeLists.txt b/cudaaligner/CMakeLists.txt index 781394386..e5f1cc684 100644 --- a/cudaaligner/CMakeLists.txt +++ b/cudaaligner/CMakeLists.txt @@ -18,6 +18,9 @@ set(MODULE_NAME cudaaligner) +# Data subdirectory +add_subdirectory(data) + # Project specific NVCC flags if((CUDA_VERSION_MAJOR GREATER 10) OR (CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR GREATER 0)) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++14 -arch=compute_60 --expt-relaxed-constexpr") @@ -49,7 +52,8 @@ target_include_directories(${MODULE_NAME}_internal INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/src ) -target_link_libraries(${MODULE_NAME} gwbase cub libcudacxx) + +target_link_libraries(${MODULE_NAME} gwbase cub gwio libcudacxx) target_compile_options(${MODULE_NAME} PRIVATE -Wall -Wextra) if (gw_optimize_for_native_cpu) diff --git a/cudaaligner/data/CMakeLists.txt b/cudaaligner/data/CMakeLists.txt new file mode 100644 index 000000000..29d35194b --- /dev/null +++ b/cudaaligner/data/CMakeLists.txt @@ -0,0 +1,8 @@ + + +cmake_minimum_required(VERSION 3.10.2) + +set(CUDAALIGNER_BENCHMARK_DATA_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../data") +configure_file(cudaaligner_file_location.hpp.in ${PROJECT_BINARY_DIR}/data/cudaaligner_file_location.hpp @ONLY) + +set_property(GLOBAL PROPERTY cudaaligner_data_include_dir "${PROJECT_BINARY_DIR}/data") diff --git a/cudaaligner/data/cudaaligner_file_location.hpp.in b/cudaaligner/data/cudaaligner_file_location.hpp.in new file mode 100644 index 000000000..5a1835371 --- /dev/null +++ b/cudaaligner/data/cudaaligner_file_location.hpp.in @@ -0,0 +1,4 @@ + +#pragma once + +#cmakedefine CUDAALIGNER_BENCHMARK_DATA_DIR "@CUDAALIGNER_BENCHMARK_DATA_DIR@" diff --git a/cudaaligner/data/query_AlignerGlobal.fasta b/cudaaligner/data/query_AlignerGlobal.fasta new file mode 100644 index 000000000..c7b591e95 --- /dev/null +++ b/cudaaligner/data/query_AlignerGlobal.fasta @@ -0,0 +1,70 @@ +>test_case_1 +TTAT +>test_case_2 +AAAAAAAAA +>test_case_3 +ATAAAAAAAA +>test_case_4 +GCTAG +>test_case_5 +GCTAG +>test_case_5 +ACTG +>test_case_5 +T +>test_case_6 +TTAT +>test_case_6 +AAAAAAAAA +>test_case_6 +ATAAAAAAAA +>test_case_6 +GCTAG +>test_case_6 +GCTAG +>test_case_6 +ACTG +>test_case_6 +T +>test_case_6 +TTAT +>test_case_6 +AAAAAAAAA +>test_case_6 +ATAAAAAAAA +>test_case_6 +GCTAG +>test_case_6 +GCTAG +>test_case_6 +ACTG +>test_case_6 +T +>test_case_6 +TTAT +>test_case_6 +AAAAAAAAA +>test_case_6 +ATAAAAAAAA +>test_case_6 +GCTAG +>test_case_6 +GCTAG +>test_case_6 +ACTG +>test_case_6 +T +>test_case_6 +TTAT +>test_case_6 +AAAAAAAAA +>test_case_6 +ATAAAAAAAA +>test_case_6 +GCTAG +>test_case_6 +GCTAG +>test_case_6 +ACTG +>test_case_6 +T \ No newline at end of file diff --git a/cudaaligner/data/query_AlignmentImpl.fasta b/cudaaligner/data/query_AlignmentImpl.fasta new file mode 100644 index 000000000..1a157f8af --- /dev/null +++ b/cudaaligner/data/query_AlignmentImpl.fasta @@ -0,0 +1,8 @@ +>test_case_1 +AAAA +>test_case_2 +CGATAATG +>test_case_3 +GTTAG +>test_case_4 +GTTACA \ No newline at end of file diff --git a/cudaaligner/data/query_ApproximateBandedMyers.txt b/cudaaligner/data/query_ApproximateBandedMyers.txt new file mode 100644 index 000000000..858306747 --- /dev/null +++ b/cudaaligner/data/query_ApproximateBandedMyers.txt @@ -0,0 +1,2 @@ +>test_case_1 +AGGGCGAATATCGCCTCCCGCATTAAGCTGTACCTTCCAGCCCCGCCGGTAATTCCAGCCGGTTGAAGCCACGTCTGCCACGGCACAATGTTTTCGCTTTGCCCGGTGACGGATTTAATCCACCACAG diff --git a/cudaaligner/data/query_NeedlemanWunschImplementation.fasta b/cudaaligner/data/query_NeedlemanWunschImplementation.fasta new file mode 100644 index 000000000..2ac993010 --- /dev/null +++ b/cudaaligner/data/query_NeedlemanWunschImplementation.fasta @@ -0,0 +1,12 @@ +>test_case_1 +ACTG +>test_case_2 +ATCG +>test_case_3 +ATG +>test_case_4 + +>test_case_5 +ACTG +>test_case_6 +BDEF \ No newline at end of file diff --git a/cudaaligner/data/query_cudaaligner_cases.fasta b/cudaaligner/data/query_cudaaligner_cases.fasta new file mode 100644 index 000000000..698e98da4 --- /dev/null +++ b/cudaaligner/data/query_cudaaligner_cases.fasta @@ -0,0 +1,22 @@ +>read_query_0 +CGTCGTCGTC +>read_query_1 +CGTCGTCGTC +>read_query_2 + +>read_query_3 +CGTCGTCGTC +>read_query_4 +C +>read_query_5 +CGTCGTCGTC +>read_query_6 +AGTCGTCGTCCGTAATCGTCCGTCGTCGTCGA +>read_query_7 +AGTCGTCGTCCGTAATCGTCCGTCGTCGTCGTA +>read_query_8 +GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGAAAACGTCGTCCGTCGTCGTCCGTCGTCGAAAACGTCGTCGTCCGTAGTCGTCCGACGTCGTCGTC +>read_query_9 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>read_query_10 +AGGGCGAATATCGCCTCCCGCATTAAGCTGTACCTTCCAGCCCCGCCGGTAATTCCAGCCGGTTGAAGCCACGTCTGCCACGGCACAATGTTTTCGCTTTGCCCGGTGACGGATTTAATCCACCACAGTTGCGACTGATAGCGTCCGGATTTCAAACGCGCCCAAACGTAAACAGGGTGAGGCATCTTCCCGCGCCAGCGCACTTCCTGCCAGATCCTCACCAACGCGTCGCGGAAAGGCATCTAAATTGCATTCTGCCCTGCCAGCCAGCGGCGGATAATCGCCGCGCGGCGGGCATCACTGTATCGCCAGCATTGGCACAATCTGCGGCGTCCCCTGCGGCGATTGACGGTGTGCTAAATCATCTGCCAACCTCAGTTCATCCAGCAGGCTCTCTTGTTCAGCACAAAGTGCGGCGCTGCGAGCCGTTGCTTCGGCAAAATGCGGCCAACGCTGCTGCCAATAACGGCACTACGCGCAGGCGCAGAAAGTTACGATCGTATGAGTCGTCCTGATTACTTTCGTCTTCAATCCAGCGTAAATCCCATACTGACGCGCCCTGCACCAGTTCCCCGCGTGCGGGCGAGCAACGGGCGAATAAGCCGCGTTCCGGCAAAACTCCGAGACTTCCGCCATAGCCGAAAGCCCGGCAGGGCCACTGCCGCGTTTAGCGCCAGCAGAAAGGTTTCACATTGATCGTCGAGATGTTGCGCGGTGACCAGCACTTCACCGGGCAACAAGGTGCGGGCAAATGCCTGATAACGTGCCTGCCGCGCCTGGGCCTCAATGCCCAGTCCTTCTTGCGCAAGTTGTACGCGTTCGACCACCAGCGGCACCACTGTTGGCAGACGTTTTCGCAATGCGTAACCCAGGCATCGGCATTGGCACTTAAGCCGTGATGCACATGAATAGCGCGCAGAGCGACACCGGATTTTCCGTCGCACTGCACCAACTGATGCAGCAGAACGGTGGAGCCAAGCCCGCCGCTAAAGGCCACCAGAATCTGGCGTGAGGTGAGAAGTTGTCTATTGAGCGTGAGTGTCATGATGGTGCGATTTTACAGTAGCAATGCCCGGCACGTTACCGGGCAAATGCGGCGATGACAAACCTTACTGCTCCATACAGTTCAGCAGCCCGTCCGGATCGTTAAAGAAGGTGAAGCGTTTTTTTGCGTGTATGGATCGACACGGATGGTTTCACTTCACGTTATGGCTTTCAAAGGTGCGCCACTGCCGCATCGATGTCATCAACGCTAAAAGCCAGATGACGCAGACCGCAAGCTTCCGGTCGGCTGGGTCGTTCCGGCCAGGAACGGAAATGAGAAAACTCAATCACATATTGCCCATTAAGCGCCGATCCCCTTTCCATGAGTCGCGCGCTTCGCGATAGACTTCGCTGCAGCGTGAAACCGAATATCGCGGTGAAAGCTTTTGCTCACCGCATAATCCGTCGCAATAATCGCAATATGGTGAACCTGTTTTAAACCCAGCATAGCGTCTCCTTTGTTGTTAACAGCACGTTACTCGCCCGGAAGCCGCTCTGGCAAGTTATCCCGCCATTTTTAGGACTCGTACGCGGTAAACGCCGTCTTCGTCCTGTTTCGCGCCGTGAATATCCGTTTCAAAACCGGGGTAATGTTGCCCGACGGAACAAAGCATCGTGAAAGAAATCGAGTATAATGCGCTCTCTTTGGTCAGCATTTCTCCAGGCATCAACAGCGGTACGCCCGGTGGATAAGGCAGGATCATATTGCCGGAAAATCTCTACCGACCAGTTGTTCCAGCGCAATGGTTTCTACTTCGCCTTTATTTTGTCGTTGCCATGCCTGATGTGGCGTCATGATCATCTCCGGCAAAGTATCGAATGCCCGCAACATCAAACCGGGAAGATCGTGTTTACGAATCAGCTTATGGATCCCTTGTGCCAGATCCTGAATACGCATATTGCGGTAGAAATCGGGATCTTCTGCATAGAGATCGGGTAGCATATTTTTGATCCGCAGGTTGAGATCGTAAGAGCGTTTGAATTCCGTCAACCCACGCAATAATCCCATTGCTTTGGTTTTATCGATGCCAATACTAAAGAGAAACAGCAGGTTATAAGGGCCGGTTTTCTCTACTACGATCCCACGTTCGTCGAGGAATTTTGCTACCAGCGCCGCCGGGATCCCCTCCTCGCTCATATTGCCCTGCTCGTCCATCCCCGGTGTCAAAATAGTGACTTTAACCGGATCCGAGAAACATATGATCAGCATCCGCATCGTTAAAGCCGTGCCACTGTTCGCCAGGCGCAACGGGCCAGCATTCGGCTTCATCCACCTGCGGCGGTTGCCAGATATCGAAAAACCAACCGTCAGACTCTTCCCGCAGCCGCTGGACCTCTTTGCGAAAATGCAGAGCTCGTTCTACTGAACGGTTAATCAGCCGTTTGCGGATTACCACGCAGCATCGCCGCCGCCGTCTCAACCGAAGCAATGGGATAACTGGGCGAGGTGGTGGTATCATCATAAAGGCTTCGTTAAAGGCCTCTTCGTCATACTCGCCTTTGTGTGGATCCAGCGAAGCCTGCGATAACGCCGCCAGCATTTTGTGGGTCGATTGCGTTTCGAAGATCACTTTTCCCGCAACACGCTCGCCGCTCATACCACTTTTACCACCTGGTAGATCGGATGAAAATGGGTGTACGGCACCCAGGCAGAATCGAAGTGAATCGACAGGACATCCAGCGTCTGTTTGATCCAGTCGGTGTTGTAGAGCAAGCCATCATAGGTGGAGTTGGTGATCACCGCATGAACCGGCCATTGTGCTTGCGTGGTAGCAGCGACTTTCTCTTCGATGCTGTCGCGAGTAAATTCACGGCGCGGGATCCCACAAAATCCCCAACGCATTACGCGTCGGTTTTTCAGCCAGACTGGCACTACATCGTTCATCATCAACAGATGCGCCAGCGATTATGACAATTGCGGTCGATCAACAGCAGCGTACTGCCGGATGGCGCGGCGTTTATACCCACAATTTTGTTCGACGTCGATGTTCCGTTGGTAACGATATAACTCTGTTCCGCGCCAAAAGTCCGCGCGATGTACTCTTCCGCTTCCAGGTGTGGCCCGGTGTGGTCGAGCAACGAACCAAGCTCGGTGACCGAAATAGAGACATCAGCCTTAAGAGTATTCCCGCCGAAAAAATCATAAAACAGACAGCCAACCGGGCTTTTTGATATGCAGTACCGCCCATATGCCCCGGCGTACAAAGGGTGTACTTCCGCTCTTTGACGTAGGTAAACAAGCTTTCGTGAACGGCGGTGTAATGTTATCAAGATATTCGTCGGTGTACTGACGCATACGAATGGCGATATCTTCCGCCTGCCCCAGCGCATATTCAAAAAACCAGAGCGCCATCCGCATATCCTGCACGCTGACATCCATCGTCGAGTGGGTGTTGATGAAGGCATAAAGCGGGAGATATTCATTAAGCTGATTGATATCGCTACATAAATCGAGACTGTACTCATCCCAGTCAAAATCACGCCGCAAATTCGGGAGTTATACTCGATAAATTTCAGCAATCAACGCTGTTTTTGTGTGAATAATCTGAAAGCCTTGCGCCACCAGCGCCGACTCCAGTTCTTTGATGGGCTCATCTTTATAAAAGACGCCATGCGGTCCCATAATGGCAATGATGTTCATGTGTTCCTCCTGGAAAATCCTTCCTTAATCATAGCCTGCTCAAACCGTGGCGATAAAAGGGTTACCGGTGACCCTTTTTCAGAACTTTGCAATTACGCGTAACCGTAGCTCATCAGGCGCTGATAACGACGGAATTTTTTAAATCTTCAGTGCTTAACACGTCGAGGACGGCCAGATCCGCCAGCAGTTGCGCTTTCAACGATGCCGCCATCGCTTCCGGGTTACGGTGAGCACCACCCAGTGGTTCCGGGATGATGGAGTCGATCAGTTTCAGTTCTTTCAGACGCGGAGCAATGATACCCATCCTTCAGCCGCCAGCGCACAGCCGGCACTCTTCCACAGAATGGACGCACAACCTTCCGGCGAGATAACCGGAATAGGTGCTGTATTGCAGCATATTCGCTTTATCGCCACGCCAATCGCCAGCGCACCGCCAGAACCACCTTCACCGATAACCGTACAAACTACCGGTACGCCGGGCGAGACATTTCACGCAGGTTGCGTGCAATGGCTTCCAGACTGACCACGCTCTTCTGCGCCCACGCGGGGATAAGCCCCGGGGTGTCGGTAAAGGTGATGATAGGCATCTTAAAGCGTTCAGCCATTTGCATCCAGACGCAGTGCTTTGCGGTAACCTTCTGGCGCTGGCATACCAAAGTTACGGCGAATTTTTTCTTTGGTTTCGACCTTTTGATGATAATGATCATCACCGGACGACCATCGAGACGGGCGATACCACCGACGATAGCTTTATCGTCTGCATACGCGCGGTCGCCAGCCAGTTCGTCAAATTCATCAAATGCCAGGCGAACGTAATCCAGGGTATAAGGACGCGCTGTGGATGGCGTGCCAGTTGCGCAATCTGCCATGCACCGAGATCGGCGAAGATTTGTTTGCGGTGTCAGTTCTACGCTTTTTTCACGCAGACGATGCACTTCTTCATCGATGTTAATATCAGTTTCTCATCCTGACGGCTAACCTGCCAGTCAGAGAATCATTTTCGCTTCCAGCTCTGCAATATTGTTCAAAATCAGGAAATTCAGACTCATAGTATTCCTGTATTAGTCAAACTCCAGTTCCACCTGCTCCGAACCAATGAGGCCACGGAGATCGTTTAATAAACGATCGCTCGGAGAGACACGCCACGTCGCGCCAAAACGCAACCGCGCGCATGCATCCGCCCTCTGATAGTAGGAATGTACTGGAATTGTCCCAGAGCGGTGGGGTTCCAGAGACTGACGGAGTCGGTTTAAAGCTGGTCATCAATTTGCCTGTCCGTCAGCGAGATAGCAAGCCCGCGAGCATATTTTTCCCGGGCTTCGTCAATATCCATCACTTCGCGAGCGGTCATTTTAAGCCCACCGCTGAAGTCATCAAAGCTGACCTGTCCGCTGACGATAAGTATGCAGTCTTTCCAGCAATTGCTGGTATTTATCCAGGGGCGTCAGTAAGCAACATCACTTCCAGCCGCCGGAACGGTCATCCAGCGTGCAGATACCGATACGATTGCCGCGCTTGGTGACCATAACCCGCGCGGCAACAACTTAACCCCGCAGCCGTGATGACTTTACCACGTTCTGTCGGGTGCATGTCTTTCAGCCGCCGCCTCCGACATAACGCTCAATCTCTTTTAAATACTGGTTGATAGGGTGTCCGGTCAGGTACAGGCCTAACGTTTCACGTTCCCCATCTAATACCACCTGCTCCGGCCACGGTTGGCAGCTGGCGTAGGATTGTTCAATTTGTTCCGGCTCTTCGGCCAGCACGCCGAACATATCGGCCTGACCGATAGCTTCCGCTTTCGCGTGTTGATCTGCCGCTTTTAACGCATCGCCCAGCGAGTTCATCAGCGCTGCGCGATGTGGCCAAGACGGTCAAACGCCCCGGACATGATCAGTTTTTCCAGCACGCGACGGTTCAACTTTTTGGTGTCGGTACGGGCGCAGAGATCAAACAGTTCGCGGAAGTAGCCGCCTTTATTACGGGCTTCGATGATGGCCTCAATCGGACCTTCACCGACCCCGATCGCGCCAATACCATACACGATTTCGCCGTCGTCGTTGACGTGGAAATGGTAAAGACCGGAGTTTATATCTGGTGGCAGGATTTTCAG diff --git a/cudaaligner/data/result_AlignerGlobal.fasta b/cudaaligner/data/result_AlignerGlobal.fasta new file mode 100644 index 000000000..1dc1b3f10 --- /dev/null +++ b/cudaaligner/data/result_AlignerGlobal.fasta @@ -0,0 +1,35 @@ +test_case_1 4M 3 +test_case_2 1M1D8M 1 +test_case_3 1M1I8M 1 +test_case_4 3M1D1M1I 3 +test_case_5 3M1D1M1I 3 +test_case_5 4M 0 +test_case_5 1M 1 +test_case_6 4M 3 +test_case_6 1M1D8M 1 +test_case_6 1M1I8M 1 +test_case_6 3M1D1M1I 3 +test_case_6 3M1D1M1I 3 +test_case_6 4M 0 +test_case_6 1M 1 +test_case_6 4M 3 +test_case_6 1M1D8M 1 +test_case_6 1M1I8M 1 +test_case_6 3M1D1M1I 3 +test_case_6 3M1D1M1I 3 +test_case_6 4M 0 +test_case_6 1M 1 +test_case_6 4M 3 +test_case_6 1M1D8M 1 +test_case_6 1M1I8M 1 +test_case_6 3M1D1M1I 3 +test_case_6 3M1D1M1I 3 +test_case_6 4M 0 +test_case_6 1M 1 +test_case_6 4M 3 +test_case_6 1M1D8M 1 +test_case_6 1M1I8M 1 +test_case_6 3M1D1M1I 3 +test_case_6 3M1D1M1I 3 +test_case_6 4M 0 +test_case_6 1M 1 \ No newline at end of file diff --git a/cudaaligner/data/result_AlignmentImpl.txt b/cudaaligner/data/result_AlignmentImpl.txt new file mode 100644 index 000000000..0854eedcf --- /dev/null +++ b/cudaaligner/data/result_AlignmentImpl.txt @@ -0,0 +1,4 @@ +test_case_1;AAAA-;xx|x ;TTATG;4M1I;2X1=1X1I;true;mismatch/mismatch/match/mismatch/insertion/ +test_case_2;CGATAATG; x|||| ;-CATAA--;1D5M2D;1D1X4=2D;true;deletion/mismatch/match/match/match/match/deletion/deletion/ +test_case_3;--GT-TAG--; || ||| ;AAGTCTAGAA;2I2M1I3M2I;2I2=1I3=2I;true;insertion/insertion/match/match/insertion/match/match/match/insertion/insertion/ +test_case_4;G-TTACA;| || ||;GATT-CA;1M1I2M1D2M;1=1I2=1D2=;false;match/insertion/match/match/deletion/match/match/ \ No newline at end of file diff --git a/cudaaligner/data/result_ApproximateBandedMyers.txt b/cudaaligner/data/result_ApproximateBandedMyers.txt new file mode 100644 index 000000000..33491b371 --- /dev/null +++ b/cudaaligner/data/result_ApproximateBandedMyers.txt @@ -0,0 +1 @@ +test_case_1 23 diff --git a/cudaaligner/data/result_NeedlemanWunschImplementation.txt b/cudaaligner/data/result_NeedlemanWunschImplementation.txt new file mode 100644 index 000000000..93cb29d6d --- /dev/null +++ b/cudaaligner/data/result_NeedlemanWunschImplementation.txt @@ -0,0 +1,6 @@ +test_case_1 0 +test_case_2 3 +test_case_3 2 +test_case_4 0 +test_case_5 4 +test_case_6 4 \ No newline at end of file diff --git a/cudaaligner/data/target_AlignerGlobal.fasta b/cudaaligner/data/target_AlignerGlobal.fasta new file mode 100644 index 000000000..38e920ccb --- /dev/null +++ b/cudaaligner/data/target_AlignerGlobal.fasta @@ -0,0 +1,70 @@ +>test_case_1 +AAAA +>test_case_2 +ATAAAAAAAA +>test_case_3 +AAAAAAAAA +>test_case_4 +ACTGA +>test_case_5 +ACTGA +>test_case_5 +ACTG +>test_case_5 +A +>test_case_6 +AAAA +>test_case_6 +ATAAAAAAAA +>test_case_6 +AAAAAAAAA +>test_case_6 +ACTGA +>test_case_6 +ACTGA +>test_case_6 +ACTG +>test_case_6 +A +>test_case_6 +AAAA +>test_case_6 +ATAAAAAAAA +>test_case_6 +AAAAAAAAA +>test_case_6 +ACTGA +>test_case_6 +ACTGA +>test_case_6 +ACTG +>test_case_6 +A +>test_case_6 +AAAA +>test_case_6 +ATAAAAAAAA +>test_case_6 +AAAAAAAAA +>test_case_6 +ACTGA +>test_case_6 +ACTGA +>test_case_6 +ACTG +>test_case_6 +A +>test_case_6 +AAAA +>test_case_6 +ATAAAAAAAA +>test_case_6 +AAAAAAAAA +>test_case_6 +ACTGA +>test_case_6 +ACTGA +>test_case_6 +ACTG +>test_case_6 +A \ No newline at end of file diff --git a/cudaaligner/data/target_AlignmentImpl.fasta b/cudaaligner/data/target_AlignmentImpl.fasta new file mode 100644 index 000000000..256deae49 --- /dev/null +++ b/cudaaligner/data/target_AlignmentImpl.fasta @@ -0,0 +1,8 @@ +>test_case_1 +TTATG +>test_case_2 +CATAA +>test_case_3 +AAGTCTAGAA +>test_case_4 +GATTCA \ No newline at end of file diff --git a/cudaaligner/data/target_ApproximateBandedMyers.txt b/cudaaligner/data/target_ApproximateBandedMyers.txt new file mode 100644 index 000000000..1981e1552 --- /dev/null +++ b/cudaaligner/data/target_ApproximateBandedMyers.txt @@ -0,0 +1,2 @@ +>test_case_1 +AGGGCGAATATCGCCTCCGCATTAAACTGTACTTCCCAGCCCCGCCAGTATTCCAGCGGGTTGAAGCCGCGTCTGCCACAGCGCAATGTTTTCTTTGCCCACGGTGACCGGTTTAGTCACTACAGTTGC diff --git a/cudaaligner/data/target_NeedlemanWunschImplementation.fasta b/cudaaligner/data/target_NeedlemanWunschImplementation.fasta new file mode 100644 index 000000000..a0f24f610 --- /dev/null +++ b/cudaaligner/data/target_NeedlemanWunschImplementation.fasta @@ -0,0 +1,12 @@ +>test_case_1 +ACTG +>test_case_2 +ACTG +>test_case_3 +ACTG +>test_case_4 +ACTG +>test_case_5 +ACTGGTCA +>test_case_6 +ACTG \ No newline at end of file diff --git a/cudaaligner/data/target_cudaaligner_cases.fasta b/cudaaligner/data/target_cudaaligner_cases.fasta new file mode 100644 index 000000000..ccfdbde1b --- /dev/null +++ b/cudaaligner/data/target_cudaaligner_cases.fasta @@ -0,0 +1,22 @@ +>read_target_0 +AAAAAAAAAA +>read_target_1 +AATAATAATA +>read_target_2 +AATAATAATA +>read_target_3 + +>read_target_4 +AATAATAATA +>read_target_5 +CGTCGTCGTC +>read_target_6 +CGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGT +>read_target_7 +CGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC +>read_target_8 +GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC +>read_target_9 +GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC +>read_target_10 +AGGGCGAATATCGCCTCCGCATTAAACTGTACTTCCCAGCCCCGCCAGTATTCCAGCGGGTTGAAGCCGCGTCTGCCACAGCGCAATGTTTTCTTTGCCCACGGTGACCGGTTTAGTCACTACAGTTGCGACGATGGCGCATCAGTTTACCGACAGCCCAAACATCGGTAGGTGAGGCATCTTCCCGCGCCAGCGCCACTTCCTGCCAGATCCTCACCAACGTCGCGGGAAGGCATCTGGTGCATTCTGCCTGCCAGCCAGCGGCGGATAATCTCGCCGCGCGGCGGCATCACTCATCGCCAGCATTGGCGCAATCTGCAGCGTCCCCTGCGGCGATTGACAGTGTCTCAAAATCATCTGCCAGCAGTTCATCCAGCAAGCTCTCTTGTTCAGCATTTAAAGTGCTGCGAGCCGTTAGCCCGTGTCTCGGCTGCCTTTGCTGCAACCAACGGCACTACGCGCAGGCGCAGAAAGTTACGATCGTATGAGTCATCACGATTGCTCGTCTTCAATCTGGCGTAAATCATACTGGCGCTTTCACTGCACCGGATTTTCAGCCCCCCGCGTGCGGGCGAGCAACGGGCGAATAAGCCTGCCGCGTTCGGCAAGCCTCCGAGACTTCCGCGCCCAGAAAACCGGCAGGGCCACTGCGTTTTAGCGCGTGGTTTCTCACGTTGATCATTCGAGATGTTTGCGCGGTGACCAGCACTTCACCAGGCGACAAGATTTTGAACAATGCCTGATAACGTGCCTGCCGCGCACGGGCCTCGATGCCCAGTCCTTCTTGCGCAAGTTGTACGCGTTCGACCACCAGCGGCACCTGCCACTGTTGGCGAACGTTTTCGCAATGCGTAACCAGGCATGACATTGGCACTTAAACCCGTGATGCATGAATAGCGCGCGGTGACACCGGATTTTCCGTCCCGCCACTGCACCAGCTTTGATGCAGCAGAACGGGGTGAGTCAAGCCCGCGCTAAAGGCCACCAGAATCTGGCGTGGTGAGAAGTTGTCTATTGAAGGCGTGAGTGTCATGATGGTGCGATTTTGGTAATATTGCCCGGCACGTTACCGGGCAAATGCAGCGATGACAAACCTTACTTGCTCATACCGGTTCCAACAGCCGTCGGATCGTTAAAGAAGGTGAAGCGTTTTTGCGTGTATGGATCGACACGGATGGTTTCACGCTACGTTATGGCTTTCAAGGTGCGCTGCACCGCGTCGATGTCATCAACGCTCGCGGCAGATGACGCAGACCTGGCTTCCGGTCGGCAGTTCGTTCCGGCAGGGCGAAATGAGAAAAGCTCAATCCACATATTTACCATTAAGCGCCAAATCCGCACCAGCGGTCGCACGCTTCACGGAAATGAACTCGCTGCGGCGTGAGCAAAGTCTTATCAAAGTAAAGCTTGCACTCCTGCATAATCATCACGCAATAATCTGTATAATTACGGTTTCCTGTTTAACCAACGTAACATCTCCGCTATTTGTTAACAGCGACCACTGTGGAAACCGCTCTACTGTAACGGATTATCCCCGCCATTTTTAGGACTCGTCGCGGTAAACGCCGTCGTCTGATCCCGCCATTTCGCGCCGTGAATATCCGTTTCAAAACCGGGGTAATGTTGCCCGACGGAACAAAACATCATTAATCGGTACTGTGCGGCTCTCTTTAGTCAACATTTCTCCAGGCATCAACAGCGGTACGCCCGATTTAAGGCAGGATCGCCAATACGCCGATACTCTGCAGGCCAGTTTGTTCCAGCGCAATGGTTCTACTTCGCCTTTAATTTGTCGTTGCCATGCCTGATGTGGCGTCACCGATCATCTTCCGGCAAAGTATCAGATGCCGCAACATCGTCAGGAAGATCGTGCACGAATCAGCTTATGGATCATGTTGCCAGATCGCAGATACGCATATTGCGGTGAAATCAGGATCTTCTGCATAGAGATCGGGTAGCATATTTTTGATCCGCAGGTTGAGATCGTAAAGAGCGTTTGAATTCAAATCCTTCACGCAATATAATCCCATTTGCTTTGGTTTTATCTGATGCCAATACTAAAGAAACAGCAGGTTATAGTATAGTTTTCTGCACCACGTCCCACCACATTCGTCAAATTTTGCTGCCAGCTACCCATAGGATCCTCCTCGCTCATATTGCCCTGCTCGTCCATTACAGTGTCCAAAATAGGTGACTTTAACCGGATCGAGAAACATATGATCGGCATCCGCATCGTTAAAGCCGTGCCACTATTTCGCCAGGCGCAACGGGCCAGCATTCGACTTCATCACACCACGGCGGTTTTGCCAGATATCAGCTCAACCAAATCGGACTCTTCCCGCAGCCGCTGGACCTCTTTGAAAAATATGCAGAGCTCATTCTGCTGAACGGTTAATCTGTGTTTACTTGCTGCCCGGATTACCACGCAGCATCACCGCCGCCGCTCTCAACCAATGACAATGTGGATAACCTGGGCGGGTGGGTATGCATCATGAGCTTCGTTAAGTATCTTCGTCATACTCGCCTTTAATGTGGATCAGCAAAGCCTGCGATAACGCCGCCAGCATTTGTGGGTCGATTGCGTTTGAAGATCACTTTTTCCACAACACGCTCGCCGCTCATACCACTTTTACCACCGGTAGATCGGATGAAAATGGGTGTACATAACTTTACCAGGCCAGTCAAAGTGAATCGACGGGACATCCAGCGTCTGTTTACGATCCAGTCGGTGTTGTAGAACAAGCCATCGCCGTGGGTTAATTTTGATGCCACGCATAGACGACGTGTCTTGCGTGGTAGCAACGACCTCTTCGATACTTCATCGCGGTAAATTCTGTACGGGATCCTCACACCAAGGTCTAACGCATTACGCGTCGGTTTCGGCCAGACTGGCGCTACATCGTTCATCATCAACAGATGCGCCAGCGATTTATGACAATTGCGGTCGATCATGGCGTACTGGTGGCGCGGCGTACACCCACAATTTTGTTCGACGATCGATGTTCGTTGGTAGCAATGATATAATAACTCTGTTCCGCGCCAAAGTCGCGGTGTACTCTTCCGCGAACTTCGGTGTACAGGTGTGGTCAGAGCAACGAACGTGACGCCATTGACCGAAATAGAGACATCAGCCCATGAAGTATATTCCCGCCGAAAAAAATCATAAAACAGACAGCCAAGCCAGGCTTTTTGACAGCCGCGGTGCCGCTATGCCTGAAACTACCAAAAGTGTACTTCGCTCTTGACGTAGGTAAACAAAACTTTTGACCGAACGGCGGTGTGGTATGAAGATACGTCGGTGTACTGACATACAGGCTGGCGATATCTTCGCTGCTGGCGCATATTCAAAAAACCAGAGCGCCATCATATCCTGCACGCTGACATCCATCGTCGAGTGGGTGTTGATAAGAAAGGCATAAAGCGGGAATGTTGTTGGCGATTGTTATCGCTACATAAATCGAGACTGTACTCATCCCCGGTCAAAAATCACGCCGCAAATTCGAGGGTTATGCTCGATAAATTTCAACAAATCAACATGTTTTGTGGCCAGATAATCTGACGCCCTCGCGCCGACTCCAGTTCTTTGATGGGCTCATCTTTATAAAAGACGCCATGCGGTCCCATAATGGCAATGATGTTCATGTGTTCCTCCTGGAAAATCCTTCCTTGTCTGTATATCCGACCATTGGCGATAAAAAGACCACCGAGAAGTGACCTTTTTCAGGGCTTTTACCAATTACGCGTAACCGTAGCTCATCAGGCGCTGATAACGACGATTTTTTAAATCTTCAGTGCTTAACACGTCATGAGATCGGCCAGATCACCGCCAGCAGTTGCTCAGCGATACGCCGCCATCGGCTTCCGGGTTACGGTGAGCACCCTTTGGTGGTTCCGGGATGATGGAGTCGATCCGGTTCCAGTTCCTTTCAGACGCGGAACAATGAGTACCCATCGCTTCAGCCCCACTTCGGCGGCGCTTTGTCGGCGCTCTTCCACAGAATCAGACACAACGCTTCCGGCGGAATAACGGAATAGGTGCTGTATTTGCAGCATCATATTCACTTTATCGCCCACGCCAATCGCCAGCGCACTACCGCCAGAACCACGCTTCACCGATAACCGTACAAACTACCGGTACGCCGAGGCGAGACATTTCCGCGGGTTGCATTAAATGGCTCAGACTGACCACGCTCTTCTGCGCCCACGCCAGATAAGCCCGGTCGATAAGGTGATGATAAGCATCGCCAGCGTTCAGCCATTTGCTGTCAGACGCAGTGCTTTTGCGGTAACCTTCTGGCGCTGGCGCACTGCCAAGTTACGTTTTTCTTTTTGGTTTCACGACCTTTTTGACGACCAATGATCATCACCAGGACGACTCATCAGGTGGGCGATACCACCGACGATGTTCTCGCAATCTTCGCCCTGCTGGTCGCCAGCCAGTTCGTCAAATTCATCAAATCGCCAGGCAGACGCGTAATCCGAGGTATAAGGACACTGTGGATGGCGTACCAGTTGCGCAATCTGCCATGCCAGATCGGCGAAAGATTTTACGTAATCAGTTCTACGCTTTTTCACGCGAACGATGCGCTTCTTCATCGATGTTAATATCCAGTTTCTCATCCTGACGGCTAACCGCAGTCAGAGAATCGATTTTCAACTACAACTCTGCAATCAGCTGCAGTGCAAATCAAGGAAATTCCAGACTCACTAAAAATGTTCTGTATTGATAAACTCCGGATTCACCTGCTCCAGGGCTGGCTAAAAATTACCAGGATCGTTGTCGGCGATCGCTCGGAAGACACGCCACGTCGCGCCAAAACGCAACCGCGCGCTGGCATTAATCGCCTCTGATGATGAGATAACTTTGCTGATTGTCCAGAGCGGTGGGGTTTCCGGGGCAGCCAGTCAGGTTTACCAAAAGCTGGTCATCAATTTGCCTGTCGTCCGTGATTACATAAAAGCCCGCGAGCATATTTCAGGCTTCGTCAATATCCATCACTTCGCCGACAGTTATTCAGCCGCTATGATCATCGGCTGACCTGTCCACACGACGTCGGTATCGGTCTTTACCAGCAGTTTTCGAGTGGCCCAACATCAGGGCGTCAGTAAATAACATCTTCCAGCCACCGGAACGGTCATCAGCGTGCAGATACCGATACGATTGCCGCGGTGACCTGAACCGCGGCAACAACGAGCCCCACCGGCGTGACTTTACGTTCTGTCGTTGCGTATCGCGCCGACGCCGCGCCTCCGACATAACGCTCAATCTCTTTTAAATACTGGTTGATAAGTCGCCACGATCTGGGAATTTCAGGCTTCAACGTTCCGTTCCCCATCTGATACCGCCTGCTCTATGCACGGTTGGCAACTGGCGTGGGTCTTCCGTTGTTCAGCTCGCTGAAATGACACGCCGAACATATCGGCCTGACCGACGTGGCTTCCACCTTTCGCGTGTTGATCTGCCGCTTTTAACATCATAGCGAGTTCATCAGCGCGCTGCGCGATGTGTTGAGCAGATAAACACCCCGGACGCGATCAGTTTTTCCAGCACGCGACAGTTCTCGCTTTGCAGTGGTGTCCAGTTACCGGCGCAGAGAGATCGTCGGATCGCCAAGATGGCCATACCATTACCGGAGAAACTGATGATGGCCTCAATCAGGACCTTCACCGACCTTTTGATCAACACCAATACCATACCACGATTCGCCGTCATTGACGTGAAATATTAAGGCGGAGTTTATATCTGGTGGCAGGATTTTCAG diff --git a/cudaaligner/tests/CMakeLists.txt b/cudaaligner/tests/CMakeLists.txt index bcfc0c183..966f5d278 100644 --- a/cudaaligner/tests/CMakeLists.txt +++ b/cudaaligner/tests/CMakeLists.txt @@ -30,6 +30,9 @@ set(SOURCES Test_NeedlemanWunschImplementation.cpp ) +get_property(cudaaligner_data_include_dir GLOBAL PROPERTY cudaaligner_data_include_dir) +include_directories(${cudaaligner_data_include_dir}) + set(LIBS cudaaligner gwbase) diff --git a/cudaaligner/tests/Test_AlignerGlobal.cpp b/cudaaligner/tests/Test_AlignerGlobal.cpp index 9261a4e3b..edf4a5928 100644 --- a/cudaaligner/tests/Test_AlignerGlobal.cpp +++ b/cudaaligner/tests/Test_AlignerGlobal.cpp @@ -18,12 +18,17 @@ #include "../src/aligner_global_myers.hpp" #include "../src/aligner_global_myers_banded.hpp" #include "../src/aligner_global_hirschberg_myers.hpp" +#include "cudaaligner_file_location.hpp" #include #include #include +#include +#include #include +#include +#include #include "gtest/gtest.h" namespace claraparabricks @@ -73,59 +78,34 @@ struct AlignerTestData std::vector create_aligner_test_cases() { std::vector test_cases; - AlignerTestData data; - - // Test case 1 - data.inputs = {{"AAAA", "TTAT"}}; - data.cigars = {"4M"}; - data.edit_dist = {3}; - data.algorithm = AlignmentAlgorithm::Default; - test_cases.push_back(data); - - // Test case 2 - data.inputs = {{"ATAAAAAAAA", "AAAAAAAAA"}}; - data.cigars = {"1M1D8M"}; - data.edit_dist = {1}; - data.algorithm = AlignmentAlgorithm::Default; - test_cases.push_back(data); + std::map map_test_case; - // Test case 3 - data.inputs = {{"AAAAAAAAA", "ATAAAAAAAA"}}; - data.cigars = {"1M1I8M"}; - data.edit_dist = {1}; - data.algorithm = AlignmentAlgorithm::Default; - test_cases.push_back(data); + std::unique_ptr target_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/target_AlignerGlobal.fasta", 0, false); + std::unique_ptr query_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/query_AlignerGlobal.fasta", 0, false); - // Test case 4 - data.inputs = {{"ACTGA", "GCTAG"}}; - data.cigars = {"3M1D1M1I"}; - data.edit_dist = {3}; - data.algorithm = AlignmentAlgorithm::Default; - test_cases.push_back(data); + assert(target_parser->get_num_seqences() == query_parser->get_num_seqences()); + for (claraparabricks::genomeworks::read_id_t read = 0; read < target_parser->get_num_seqences(); read++) + { + assert(target_parser->get_sequence_by_id(read).name == query_parser->get_sequence_by_id(read).name); + map_test_case[target_parser->get_sequence_by_id(read).name].inputs.push_back({target_parser->get_sequence_by_id(read).seq, query_parser->get_sequence_by_id(read).seq}); + map_test_case[target_parser->get_sequence_by_id(read).name].algorithm = AlignmentAlgorithm::Default; + } - // Test case 5 - data.inputs = {{"ACTGA", "GCTAG"}, {"ACTG", "ACTG"}, {"A", "T"}}; - data.cigars = {"3M1D1M1I", "4M", "1M"}; - data.edit_dist = {3, 0, 1}; - data.algorithm = AlignmentAlgorithm::Default; - test_cases.push_back(data); + std::ifstream cigar_dist_file(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/result_AlignerGlobal.fasta"); + std::string test_case_id, cigar; + int edit_dist; + while (cigar_dist_file >> test_case_id >> cigar >> edit_dist) + { + map_test_case[test_case_id].cigars.push_back(cigar); + map_test_case[test_case_id].edit_dist.push_back(edit_dist); + } - // Test case 6 - data.inputs = { - {"AAAA", "TTAT"}, {"ATAAAAAAAA", "AAAAAAAAA"}, {"AAAAAAAAA", "ATAAAAAAAA"}, {"ACTGA", "GCTAG"}, {"ACTGA", "GCTAG"}, {"ACTG", "ACTG"}, {"A", "T"}, {"AAAA", "TTAT"}, {"ATAAAAAAAA", "AAAAAAAAA"}, {"AAAAAAAAA", "ATAAAAAAAA"}, {"ACTGA", "GCTAG"}, {"ACTGA", "GCTAG"}, {"ACTG", "ACTG"}, {"A", "T"}, {"AAAA", "TTAT"}, {"ATAAAAAAAA", "AAAAAAAAA"}, {"AAAAAAAAA", "ATAAAAAAAA"}, {"ACTGA", "GCTAG"}, {"ACTGA", "GCTAG"}, {"ACTG", "ACTG"}, {"A", "T"}, {"AAAA", "TTAT"}, {"ATAAAAAAAA", "AAAAAAAAA"}, {"AAAAAAAAA", "ATAAAAAAAA"}, {"ACTGA", "GCTAG"}, {"ACTGA", "GCTAG"}, {"ACTG", "ACTG"}, {"A", "T"}}; - data.cigars = { - "4M", "1M1D8M", "1M1I8M", "3M1D1M1I", "3M1D1M1I", "4M", "1M", - "4M", "1M1D8M", "1M1I8M", "3M1D1M1I", "3M1D1M1I", "4M", "1M", - "4M", "1M1D8M", "1M1I8M", "3M1D1M1I", "3M1D1M1I", "4M", "1M", - "4M", "1M1D8M", "1M1I8M", "3M1D1M1I", "3M1D1M1I", "4M", "1M"}; - data.edit_dist = { - 3, 1, 1, 3, 3, 0, 1, - 3, 1, 1, 3, 3, 0, 1, - 3, 1, 1, 3, 3, 0, 1, - 3, 1, 1, 3, 3, 0, 1}; - data.algorithm = AlignmentAlgorithm::Default; - test_cases.push_back(data); + for (const auto& test_case : map_test_case) + { + test_cases.push_back(test_case.second); + } + AlignerTestData data; std::minstd_rand rng(1); data.inputs = {{genomeworks::genomeutils::generate_random_genome(4800, rng), genomeworks::genomeutils::generate_random_genome(5000, rng)}}; data.cigars = {}; // do not test cigars diff --git a/cudaaligner/tests/Test_AlignmentImpl.cpp b/cudaaligner/tests/Test_AlignmentImpl.cpp index 830d72539..797ff0fd1 100644 --- a/cudaaligner/tests/Test_AlignmentImpl.cpp +++ b/cudaaligner/tests/Test_AlignmentImpl.cpp @@ -15,11 +15,14 @@ */ #include "../src/alignment_impl.hpp" +#include "cudaaligner_file_location.hpp" #include +#include #include "gtest/gtest.h" #include +#include namespace claraparabricks { @@ -63,81 +66,61 @@ typedef struct AlignmentTestData std::string cigar_extended; } AlignmentTestData; +AlignmentState get_alignment_state(const std::string s) +{ + if (s == "match") + return AlignmentState::match; + if (s == "mismatch") + return AlignmentState::mismatch; + if (s == "insertion") + return AlignmentState::insertion; + if (s == "deletion") + return AlignmentState::deletion; + assert(false); + return AlignmentState(0); +} + std::vector create_alignment_test_cases() { std::vector test_cases; AlignmentTestData data; - // Test case 1 - data.query = "AAAA"; - data.target = "TTATG"; - data.alignment = { - AlignmentState::mismatch, - AlignmentState::mismatch, - AlignmentState::match, - AlignmentState::mismatch, - AlignmentState::insertion}; - data.is_optimal = true; - data.formatted_alignment = FormattedAlignment{"AAAA-", "xx|x ", "TTATG"}; - data.cigar_basic = "4M1I"; - data.cigar_extended = "2X1=1X1I"; - test_cases.push_back(data); - - // Test case 2 - data.query = "CGATAATG"; - data.target = "CATAA"; - data.alignment = { - AlignmentState::deletion, - AlignmentState::mismatch, - AlignmentState::match, - AlignmentState::match, - AlignmentState::match, - AlignmentState::match, - AlignmentState::deletion, - AlignmentState::deletion}; - data.is_optimal = true; - data.formatted_alignment = FormattedAlignment{"CGATAATG", " x|||| ", "-CATAA--"}; - data.cigar_basic = "1D5M2D"; - data.cigar_extended = "1D1X4=2D"; - test_cases.push_back(data); - - // Test case 3 - data.query = "GTTAG"; - data.target = "AAGTCTAGAA"; - data.alignment = { - AlignmentState::insertion, - AlignmentState::insertion, - AlignmentState::match, - AlignmentState::match, - AlignmentState::insertion, - AlignmentState::match, - AlignmentState::match, - AlignmentState::match, - AlignmentState::insertion, - AlignmentState::insertion, - }; - data.is_optimal = true; - data.formatted_alignment = FormattedAlignment{"--GT-TAG--", " || ||| ", "AAGTCTAGAA"}; - data.cigar_basic = "2I2M1I3M2I"; - data.cigar_extended = "2I2=1I3=2I"; - test_cases.push_back(data); - - // Test case 4 - data.query = "GTTACA"; - data.target = "GATTCA"; - data.alignment = { - AlignmentState::match, - AlignmentState::insertion, - AlignmentState::match, - AlignmentState::match, - AlignmentState::deletion, - AlignmentState::match, - AlignmentState::match}; - data.is_optimal = false; // this example is optimal, but is_optimal = false does only mean it is an upper bound - data.formatted_alignment = FormattedAlignment{"G-TTACA", "| || ||", "GATT-CA"}; - data.cigar_basic = "1M1I2M1D2M"; - data.cigar_extended = "1=1I2=1D2="; - test_cases.push_back(data); + std::unique_ptr target_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/target_AlignmentImpl.fasta", 0, false); + std::unique_ptr query_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/query_AlignmentImpl.fasta", 0, false); + std::ifstream result_file(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/result_AlignmentImpl.txt"); + std::string result_line; + read_id_t read_id = 0; + + while (getline(result_file, result_line)) + { + data = {}; + std::stringstream linestream(result_line); + std::string test_case, formatted_field_1, formatted_field_2, formatted_field_3, optimal, alignment_field, alignment_state; + while (getline(linestream, test_case, ';')) + { + assert(query_parser->get_sequence_by_id(read_id).name == test_case); + assert(target_parser->get_sequence_by_id(read_id).name == test_case); + data.query = query_parser->get_sequence_by_id(read_id).seq; + data.target = target_parser->get_sequence_by_id(read_id).seq; + getline(linestream, formatted_field_1, ';'); + getline(linestream, formatted_field_2, ';'); + getline(linestream, formatted_field_3, ';'); + getline(linestream, data.cigar_basic, ';'); + getline(linestream, data.cigar_extended, ';'); + getline(linestream, optimal, ';'); + getline(linestream, alignment_field, ';'); + + data.formatted_alignment = FormattedAlignment{formatted_field_1, formatted_field_2, formatted_field_3}; + data.is_optimal = ((optimal == "true") ? true : false); + std::istringstream alignstream(alignment_field); + while (getline(alignstream, alignment_field, '/')) + { + data.alignment.push_back(get_alignment_state(alignment_field)); + } + } + read_id++; + test_cases.push_back(data); + } return test_cases; }; diff --git a/cudaaligner/tests/Test_ApproximateBandedMyers.cpp b/cudaaligner/tests/Test_ApproximateBandedMyers.cpp index 2492d24b9..26b49857e 100644 --- a/cudaaligner/tests/Test_ApproximateBandedMyers.cpp +++ b/cudaaligner/tests/Test_ApproximateBandedMyers.cpp @@ -15,12 +15,15 @@ */ #include "../src/aligner_global_myers_banded.hpp" +#include "cudaaligner_file_location.hpp" #include #include +#include #include #include #include +#include namespace { @@ -48,7 +51,23 @@ struct TestCase std::vector create_band_test_cases() { std::vector data; - data.push_back({"AGGGCGAATATCGCCTCCCGCATTAAGCTGTACCTTCCAGCCCCGCCGGTAATTCCAGCCGGTTGAAGCCACGTCTGCCACGGCACAATGTTTTCGCTTTGCCCGGTGACGGATTTAATCCACCACAG", "AGGGCGAATATCGCCTCCGCATTAAACTGTACTTCCCAGCCCCGCCAGTATTCCAGCGGGTTGAAGCCGCGTCTGCCACAGCGCAATGTTTTCTTTGCCCACGGTGACCGGTTTAGTCACTACAGTTGC", 23}); + + std::unique_ptr target_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/target_AlignerGlobal.fasta", 0, false); + std::unique_ptr query_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/query_AlignerGlobal.fasta", 0, false); + + std::ifstream edit_dist_file(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/result_ApproximateBandedMyers.txt"); + std::string test_case; + int32_t edit_distance; + + assert(target_parser->get_num_seqences() == query_parser->get_num_seqences()); + claraparabricks::genomeworks::read_id_t read = 0; + while (edit_dist_file >> test_case >> edit_distance) + { + assert(target_parser->get_sequence_by_id(read).name == test_case); + assert(query_parser->get_sequence_by_id(read).name == test_case); + data.push_back({query_parser->get_sequence_by_id(read).seq, target_parser->get_sequence_by_id(read).seq, edit_distance}); + } + return data; } diff --git a/cudaaligner/tests/Test_NeedlemanWunschImplementation.cpp b/cudaaligner/tests/Test_NeedlemanWunschImplementation.cpp index 1e84edc5e..f2f2b5c30 100644 --- a/cudaaligner/tests/Test_NeedlemanWunschImplementation.cpp +++ b/cudaaligner/tests/Test_NeedlemanWunschImplementation.cpp @@ -18,14 +18,17 @@ #include "../src/ukkonen_cpu.hpp" #include "../src/ukkonen_gpu.cuh" #include "../src/batched_device_matrices.cuh" +#include "cudaaligner_file_location.hpp" #include #include #include +#include #include #include #include +#include #include "gtest/gtest.h" namespace claraparabricks @@ -52,43 +55,23 @@ std::vector getTestCases() std::vector test_cases; TestAlignmentPair t; - // Test 1 - t.target = "ACTG"; - t.query = "ACTG"; - t.p = 0; - test_cases.push_back(t); - - // Test 2 - t.target = "ACTG"; - t.query = "ATCG"; - t.p = 3; - test_cases.push_back(t); - - // Test 3 - t.target = "ACTG"; - t.query = "ATG"; - t.p = 2; - test_cases.push_back(t); - - // Test 4 - t.target = "ACTG"; - t.query = ""; - t.p = 0; - test_cases.push_back(t); + std::unique_ptr target_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/target_NeedlemanWunschImplementation.fasta", 0, false); + std::unique_ptr query_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/query_NeedlemanWunschImplementation.fasta", 0, false); - // Test 5 - t.target = "ACTGGTCA"; - t.query = "ACTG"; - t.p = 4; - test_cases.push_back(t); + std::ifstream p_file(std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/result_NeedlemanWunschImplementation.txt"); + std::string test_case; + int32_t p; - // Test 6 - t.target = "ACTG"; - t.query = "BDEF"; - t.p = 4; - test_cases.push_back(t); + assert(target_parser->get_num_seqences() == query_parser->get_num_seqences()); + claraparabricks::genomeworks::read_id_t read = 0; + while (p_file >> test_case >> p) + { + assert(target_parser->get_sequence_by_id(read).name == test_case); + assert(query_parser->get_sequence_by_id(read).name == test_case); + test_cases.push_back({.target = target_parser->get_sequence_by_id(read).seq, .query = query_parser->get_sequence_by_id(read).seq, .p = p}); + } - // Test 7 + // Randomly-generated test cases std::minstd_rand rng(1); t.target = genomeworks::genomeutils::generate_random_genome(5000, rng); t.query = genomeworks::genomeutils::generate_random_genome(4800, rng); diff --git a/cudaaligner/tests/cudaaligner_test_cases.cpp b/cudaaligner/tests/cudaaligner_test_cases.cpp index 156b12ec6..e3aeea450 100644 --- a/cudaaligner/tests/cudaaligner_test_cases.cpp +++ b/cudaaligner/tests/cudaaligner_test_cases.cpp @@ -15,8 +15,12 @@ */ #include "cudaaligner_test_cases.hpp" +#include "cudaaligner_file_location.hpp" +#include #include +#include +#include #include namespace @@ -35,6 +39,23 @@ claraparabricks::genomeworks::TestCaseData generate_random_test_case(std::minstd t.query = claraparabricks::genomeworks::genomeutils::generate_random_sequence(t.target, rng, get_size(t.target), get_size(t.target), get_size(t.target)); return t; } + +std::vector load_test_case(const std::string& target, const std::string& query) +{ + std::vector tests; + + std::unique_ptr target_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(target, 0, false); + std::unique_ptr query_parser = claraparabricks::genomeworks::io::create_kseq_fasta_parser(query, 0, false); + + assert(target_parser->get_num_seqences() == query_parser->get_num_seqences()); + + for (claraparabricks::genomeworks::read_id_t read = 0; read < target_parser->get_num_seqences(); read++) + { + tests.push_back({.target = target_parser->get_sequence_by_id(read).seq, .query = query_parser->get_sequence_by_id(read).seq}); + } + + return tests; +} } // namespace namespace claraparabricks @@ -44,53 +65,9 @@ namespace genomeworks { std::vector create_cudaaligner_test_cases() { - std::vector tests; - - TestCaseData t; - - t.target = "AAAAAAAAAA"; - t.query = "CGTCGTCGTC"; - tests.push_back(t); - - t.target = "AATAATAATA"; - t.query = "CGTCGTCGTC"; - tests.push_back(t); - - t.target = "AATAATAATA"; - t.query = ""; - tests.push_back(t); - - t.target = ""; - t.query = "CGTCGTCGTC"; - tests.push_back(t); - - t.target = "AATAATAATA"; - t.query = "C"; - tests.push_back(t); - - t.target = "CGTCGTCGTC"; - t.query = "CGTCGTCGTC"; - tests.push_back(t); - - t.target = "CGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGT"; - t.query = "AGTCGTCGTCCGTAATCGTCCGTCGTCGTCGA"; - tests.push_back(t); - - t.target = "CGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC"; - t.query = "AGTCGTCGTCCGTAATCGTCCGTCGTCGTCGTA"; - tests.push_back(t); - - t.target = "GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC"; - t.query = "GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGAAAACGTCGTCCGTCGTCGTCCGTCGTCGAAAACGTCGTCGTCCGTAGTCGTCCGACGTCGTCGTC"; - tests.push_back(t); - - t.target = "GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC"; - t.query = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; - tests.push_back(t); - - t.target = "AGGGCGAATATCGCCTCCGCATTAAACTGTACTTCCCAGCCCCGCCAGTATTCCAGCGGGTTGAAGCCGCGTCTGCCACAGCGCAATGTTTTCTTTGCCCACGGTGACCGGTTTAGTCACTACAGTTGCGACGATGGCGCATCAGTTTACCGACAGCCCAAACATCGGTAGGTGAGGCATCTTCCCGCGCCAGCGCCACTTCCTGCCAGATCCTCACCAACGTCGCGGGAAGGCATCTGGTGCATTCTGCCTGCCAGCCAGCGGCGGATAATCTCGCCGCGCGGCGGCATCACTCATCGCCAGCATTGGCGCAATCTGCAGCGTCCCCTGCGGCGATTGACAGTGTCTCAAAATCATCTGCCAGCAGTTCATCCAGCAAGCTCTCTTGTTCAGCATTTAAAGTGCTGCGAGCCGTTAGCCCGTGTCTCGGCTGCCTTTGCTGCAACCAACGGCACTACGCGCAGGCGCAGAAAGTTACGATCGTATGAGTCATCACGATTGCTCGTCTTCAATCTGGCGTAAATCATACTGGCGCTTTCACTGCACCGGATTTTCAGCCCCCCGCGTGCGGGCGAGCAACGGGCGAATAAGCCTGCCGCGTTCGGCAAGCCTCCGAGACTTCCGCGCCCAGAAAACCGGCAGGGCCACTGCGTTTTAGCGCGTGGTTTCTCACGTTGATCATTCGAGATGTTTGCGCGGTGACCAGCACTTCACCAGGCGACAAGATTTTGAACAATGCCTGATAACGTGCCTGCCGCGCACGGGCCTCGATGCCCAGTCCTTCTTGCGCAAGTTGTACGCGTTCGACCACCAGCGGCACCTGCCACTGTTGGCGAACGTTTTCGCAATGCGTAACCAGGCATGACATTGGCACTTAAACCCGTGATGCATGAATAGCGCGCGGTGACACCGGATTTTCCGTCCCGCCACTGCACCAGCTTTGATGCAGCAGAACGGGGTGAGTCAAGCCCGCGCTAAAGGCCACCAGAATCTGGCGTGGTGAGAAGTTGTCTATTGAAGGCGTGAGTGTCATGATGGTGCGATTTTGGTAATATTGCCCGGCACGTTACCGGGCAAATGCAGCGATGACAAACCTTACTTGCTCATACCGGTTCCAACAGCCGTCGGATCGTTAAAGAAGGTGAAGCGTTTTTGCGTGTATGGATCGACACGGATGGTTTCACGCTACGTTATGGCTTTCAAGGTGCGCTGCACCGCGTCGATGTCATCAACGCTCGCGGCAGATGACGCAGACCTGGCTTCCGGTCGGCAGTTCGTTCCGGCAGGGCGAAATGAGAAAAGCTCAATCCACATATTTACCATTAAGCGCCAAATCCGCACCAGCGGTCGCACGCTTCACGGAAATGAACTCGCTGCGGCGTGAGCAAAGTCTTATCAAAGTAAAGCTTGCACTCCTGCATAATCATCACGCAATAATCTGTATAATTACGGTTTCCTGTTTAACCAACGTAACATCTCCGCTATTTGTTAACAGCGACCACTGTGGAAACCGCTCTACTGTAACGGATTATCCCCGCCATTTTTAGGACTCGTCGCGGTAAACGCCGTCGTCTGATCCCGCCATTTCGCGCCGTGAATATCCGTTTCAAAACCGGGGTAATGTTGCCCGACGGAACAAAACATCATTAATCGGTACTGTGCGGCTCTCTTTAGTCAACATTTCTCCAGGCATCAACAGCGGTACGCCCGATTTAAGGCAGGATCGCCAATACGCCGATACTCTGCAGGCCAGTTTGTTCCAGCGCAATGGTTCTACTTCGCCTTTAATTTGTCGTTGCCATGCCTGATGTGGCGTCACCGATCATCTTCCGGCAAAGTATCAGATGCCGCAACATCGTCAGGAAGATCGTGCACGAATCAGCTTATGGATCATGTTGCCAGATCGCAGATACGCATATTGCGGTGAAATCAGGATCTTCTGCATAGAGATCGGGTAGCATATTTTTGATCCGCAGGTTGAGATCGTAAAGAGCGTTTGAATTCAAATCCTTCACGCAATATAATCCCATTTGCTTTGGTTTTATCTGATGCCAATACTAAAGAAACAGCAGGTTATAGTATAGTTTTCTGCACCACGTCCCACCACATTCGTCAAATTTTGCTGCCAGCTACCCATAGGATCCTCCTCGCTCATATTGCCCTGCTCGTCCATTACAGTGTCCAAAATAGGTGACTTTAACCGGATCGAGAAACATATGATCGGCATCCGCATCGTTAAAGCCGTGCCACTATTTCGCCAGGCGCAACGGGCCAGCATTCGACTTCATCACACCACGGCGGTTTTGCCAGATATCAGCTCAACCAAATCGGACTCTTCCCGCAGCCGCTGGACCTCTTTGAAAAATATGCAGAGCTCATTCTGCTGAACGGTTAATCTGTGTTTACTTGCTGCCCGGATTACCACGCAGCATCACCGCCGCCGCTCTCAACCAATGACAATGTGGATAACCTGGGCGGGTGGGTATGCATCATGAGCTTCGTTAAGTATCTTCGTCATACTCGCCTTTAATGTGGATCAGCAAAGCCTGCGATAACGCCGCCAGCATTTGTGGGTCGATTGCGTTTGAAGATCACTTTTTCCACAACACGCTCGCCGCTCATACCACTTTTACCACCGGTAGATCGGATGAAAATGGGTGTACATAACTTTACCAGGCCAGTCAAAGTGAATCGACGGGACATCCAGCGTCTGTTTACGATCCAGTCGGTGTTGTAGAACAAGCCATCGCCGTGGGTTAATTTTGATGCCACGCATAGACGACGTGTCTTGCGTGGTAGCAACGACCTCTTCGATACTTCATCGCGGTAAATTCTGTACGGGATCCTCACACCAAGGTCTAACGCATTACGCGTCGGTTTCGGCCAGACTGGCGCTACATCGTTCATCATCAACAGATGCGCCAGCGATTTATGACAATTGCGGTCGATCATGGCGTACTGGTGGCGCGGCGTACACCCACAATTTTGTTCGACGATCGATGTTCGTTGGTAGCAATGATATAATAACTCTGTTCCGCGCCAAAGTCGCGGTGTACTCTTCCGCGAACTTCGGTGTACAGGTGTGGTCAGAGCAACGAACGTGACGCCATTGACCGAAATAGAGACATCAGCCCATGAAGTATATTCCCGCCGAAAAAAATCATAAAACAGACAGCCAAGCCAGGCTTTTTGACAGCCGCGGTGCCGCTATGCCTGAAACTACCAAAAGTGTACTTCGCTCTTGACGTAGGTAAACAAAACTTTTGACCGAACGGCGGTGTGGTATGAAGATACGTCGGTGTACTGACATACAGGCTGGCGATATCTTCGCTGCTGGCGCATATTCAAAAAACCAGAGCGCCATCATATCCTGCACGCTGACATCCATCGTCGAGTGGGTGTTGATAAGAAAGGCATAAAGCGGGAATGTTGTTGGCGATTGTTATCGCTACATAAATCGAGACTGTACTCATCCCCGGTCAAAAATCACGCCGCAAATTCGAGGGTTATGCTCGATAAATTTCAACAAATCAACATGTTTTGTGGCCAGATAATCTGACGCCCTCGCGCCGACTCCAGTTCTTTGATGGGCTCATCTTTATAAAAGACGCCATGCGGTCCCATAATGGCAATGATGTTCATGTGTTCCTCCTGGAAAATCCTTCCTTGTCTGTATATCCGACCATTGGCGATAAAAAGACCACCGAGAAGTGACCTTTTTCAGGGCTTTTACCAATTACGCGTAACCGTAGCTCATCAGGCGCTGATAACGACGATTTTTTAAATCTTCAGTGCTTAACACGTCATGAGATCGGCCAGATCACCGCCAGCAGTTGCTCAGCGATACGCCGCCATCGGCTTCCGGGTTACGGTGAGCACCCTTTGGTGGTTCCGGGATGATGGAGTCGATCCGGTTCCAGTTCCTTTCAGACGCGGAACAATGAGTACCCATCGCTTCAGCCCCACTTCGGCGGCGCTTTGTCGGCGCTCTTCCACAGAATCAGACACAACGCTTCCGGCGGAATAACGGAATAGGTGCTGTATTTGCAGCATCATATTCACTTTATCGCCCACGCCAATCGCCAGCGCACTACCGCCAGAACCACGCTTCACCGATAACCGTACAAACTACCGGTACGCCGAGGCGAGACATTTCCGCGGGTTGCATTAAATGGCTCAGACTGACCACGCTCTTCTGCGCCCACGCCAGATAAGCCCGGTCGATAAGGTGATGATAAGCATCGCCAGCGTTCAGCCATTTGCTGTCAGACGCAGTGCTTTTGCGGTAACCTTCTGGCGCTGGCGCACTGCCAAGTTACGTTTTTCTTTTTGGTTTCACGACCTTTTTGACGACCAATGATCATCACCAGGACGACTCATCAGGTGGGCGATACCACCGACGATGTTCTCGCAATCTTCGCCCTGCTGGTCGCCAGCCAGTTCGTCAAATTCATCAAATCGCCAGGCAGACGCGTAATCCGAGGTATAAGGACACTGTGGATGGCGTACCAGTTGCGCAATCTGCCATGCCAGATCGGCGAAAGATTTTACGTAATCAGTTCTACGCTTTTTCACGCGAACGATGCGCTTCTTCATCGATGTTAATATCCAGTTTCTCATCCTGACGGCTAACCGCAGTCAGAGAATCGATTTTCAACTACAACTCTGCAATCAGCTGCAGTGCAAATCAAGGAAATTCCAGACTCACTAAAAATGTTCTGTATTGATAAACTCCGGATTCACCTGCTCCAGGGCTGGCTAAAAATTACCAGGATCGTTGTCGGCGATCGCTCGGAAGACACGCCACGTCGCGCCAAAACGCAACCGCGCGCTGGCATTAATCGCCTCTGATGATGAGATAACTTTGCTGATTGTCCAGAGCGGTGGGGTTTCCGGGGCAGCCAGTCAGGTTTACCAAAAGCTGGTCATCAATTTGCCTGTCGTCCGTGATTACATAAAAGCCCGCGAGCATATTTCAGGCTTCGTCAATATCCATCACTTCGCCGACAGTTATTCAGCCGCTATGATCATCGGCTGACCTGTCCACACGACGTCGGTATCGGTCTTTACCAGCAGTTTTCGAGTGGCCCAACATCAGGGCGTCAGTAAATAACATCTTCCAGCCACCGGAACGGTCATCAGCGTGCAGATACCGATACGATTGCCGCGGTGACCTGAACCGCGGCAACAACGAGCCCCACCGGCGTGACTTTACGTTCTGTCGTTGCGTATCGCGCCGACGCCGCGCCTCCGACATAACGCTCAATCTCTTTTAAATACTGGTTGATAAGTCGCCACGATCTGGGAATTTCAGGCTTCAACGTTCCGTTCCCCATCTGATACCGCCTGCTCTATGCACGGTTGGCAACTGGCGTGGGTCTTCCGTTGTTCAGCTCGCTGAAATGACACGCCGAACATATCGGCCTGACCGACGTGGCTTCCACCTTTCGCGTGTTGATCTGCCGCTTTTAACATCATAGCGAGTTCATCAGCGCGCTGCGCGATGTGTTGAGCAGATAAACACCCCGGACGCGATCAGTTTTTCCAGCACGCGACAGTTCTCGCTTTGCAGTGGTGTCCAGTTACCGGCGCAGAGAGATCGTCGGATCGCCAAGATGGCCATACCATTACCGGAGAAACTGATGATGGCCTCAATCAGGACCTTCACCGACCTTTTGATCAACACCAATACCATACCACGATTCGCCGTCATTGACGTGAAATATTAAGGCGGAGTTTATATCTGGTGGCAGGATTTTCAG"; - t.query = "AGGGCGAATATCGCCTCCCGCATTAAGCTGTACCTTCCAGCCCCGCCGGTAATTCCAGCCGGTTGAAGCCACGTCTGCCACGGCACAATGTTTTCGCTTTGCCCGGTGACGGATTTAATCCACCACAGTTGCGACTGATAGCGTCCGGATTTCAAACGCGCCCAAACGTAAACAGGGTGAGGCATCTTCCCGCGCCAGCGCACTTCCTGCCAGATCCTCACCAACGCGTCGCGGAAAGGCATCTAAATTGCATTCTGCCCTGCCAGCCAGCGGCGGATAATCGCCGCGCGGCGGGCATCACTGTATCGCCAGCATTGGCACAATCTGCGGCGTCCCCTGCGGCGATTGACGGTGTGCTAAATCATCTGCCAACCTCAGTTCATCCAGCAGGCTCTCTTGTTCAGCACAAAGTGCGGCGCTGCGAGCCGTTGCTTCGGCAAAATGCGGCCAACGCTGCTGCCAATAACGGCACTACGCGCAGGCGCAGAAAGTTACGATCGTATGAGTCGTCCTGATTACTTTCGTCTTCAATCCAGCGTAAATCCCATACTGACGCGCCCTGCACCAGTTCCCCGCGTGCGGGCGAGCAACGGGCGAATAAGCCGCGTTCCGGCAAAACTCCGAGACTTCCGCCATAGCCGAAAGCCCGGCAGGGCCACTGCCGCGTTTAGCGCCAGCAGAAAGGTTTCACATTGATCGTCGAGATGTTGCGCGGTGACCAGCACTTCACCGGGCAACAAGGTGCGGGCAAATGCCTGATAACGTGCCTGCCGCGCCTGGGCCTCAATGCCCAGTCCTTCTTGCGCAAGTTGTACGCGTTCGACCACCAGCGGCACCACTGTTGGCAGACGTTTTCGCAATGCGTAACCCAGGCATCGGCATTGGCACTTAAGCCGTGATGCACATGAATAGCGCGCAGAGCGACACCGGATTTTCCGTCGCACTGCACCAACTGATGCAGCAGAACGGTGGAGCCAAGCCCGCCGCTAAAGGCCACCAGAATCTGGCGTGAGGTGAGAAGTTGTCTATTGAGCGTGAGTGTCATGATGGTGCGATTTTACAGTAGCAATGCCCGGCACGTTACCGGGCAAATGCGGCGATGACAAACCTTACTGCTCCATACAGTTCAGCAGCCCGTCCGGATCGTTAAAGAAGGTGAAGCGTTTTTTTGCGTGTATGGATCGACACGGATGGTTTCACTTCACGTTATGGCTTTCAAAGGTGCGCCACTGCCGCATCGATGTCATCAACGCTAAAAGCCAGATGACGCAGACCGCAAGCTTCCGGTCGGCTGGGTCGTTCCGGCCAGGAACGGAAATGAGAAAACTCAATCACATATTGCCCATTAAGCGCCGATCCCCTTTCCATGAGTCGCGCGCTTCGCGATAGACTTCGCTGCAGCGTGAAACCGAATATCGCGGTGAAAGCTTTTGCTCACCGCATAATCCGTCGCAATAATCGCAATATGGTGAACCTGTTTTAAACCCAGCATAGCGTCTCCTTTGTTGTTAACAGCACGTTACTCGCCCGGAAGCCGCTCTGGCAAGTTATCCCGCCATTTTTAGGACTCGTACGCGGTAAACGCCGTCTTCGTCCTGTTTCGCGCCGTGAATATCCGTTTCAAAACCGGGGTAATGTTGCCCGACGGAACAAAGCATCGTGAAAGAAATCGAGTATAATGCGCTCTCTTTGGTCAGCATTTCTCCAGGCATCAACAGCGGTACGCCCGGTGGATAAGGCAGGATCATATTGCCGGAAAATCTCTACCGACCAGTTGTTCCAGCGCAATGGTTTCTACTTCGCCTTTATTTTGTCGTTGCCATGCCTGATGTGGCGTCATGATCATCTCCGGCAAAGTATCGAATGCCCGCAACATCAAACCGGGAAGATCGTGTTTACGAATCAGCTTATGGATCCCTTGTGCCAGATCCTGAATACGCATATTGCGGTAGAAATCGGGATCTTCTGCATAGAGATCGGGTAGCATATTTTTGATCCGCAGGTTGAGATCGTAAGAGCGTTTGAATTCCGTCAACCCACGCAATAATCCCATTGCTTTGGTTTTATCGATGCCAATACTAAAGAGAAACAGCAGGTTATAAGGGCCGGTTTTCTCTACTACGATCCCACGTTCGTCGAGGAATTTTGCTACCAGCGCCGCCGGGATCCCCTCCTCGCTCATATTGCCCTGCTCGTCCATCCCCGGTGTCAAAATAGTGACTTTAACCGGATCCGAGAAACATATGATCAGCATCCGCATCGTTAAAGCCGTGCCACTGTTCGCCAGGCGCAACGGGCCAGCATTCGGCTTCATCCACCTGCGGCGGTTGCCAGATATCGAAAAACCAACCGTCAGACTCTTCCCGCAGCCGCTGGACCTCTTTGCGAAAATGCAGAGCTCGTTCTACTGAACGGTTAATCAGCCGTTTGCGGATTACCACGCAGCATCGCCGCCGCCGTCTCAACCGAAGCAATGGGATAACTGGGCGAGGTGGTGGTATCATCATAAAGGCTTCGTTAAAGGCCTCTTCGTCATACTCGCCTTTGTGTGGATCCAGCGAAGCCTGCGATAACGCCGCCAGCATTTTGTGGGTCGATTGCGTTTCGAAGATCACTTTTCCCGCAACACGCTCGCCGCTCATACCACTTTTACCACCTGGTAGATCGGATGAAAATGGGTGTACGGCACCCAGGCAGAATCGAAGTGAATCGACAGGACATCCAGCGTCTGTTTGATCCAGTCGGTGTTGTAGAGCAAGCCATCATAGGTGGAGTTGGTGATCACCGCATGAACCGGCCATTGTGCTTGCGTGGTAGCAGCGACTTTCTCTTCGATGCTGTCGCGAGTAAATTCACGGCGCGGGATCCCACAAAATCCCCAACGCATTACGCGTCGGTTTTTCAGCCAGACTGGCACTACATCGTTCATCATCAACAGATGCGCCAGCGATTATGACAATTGCGGTCGATCAACAGCAGCGTACTGCCGGATGGCGCGGCGTTTATACCCACAATTTTGTTCGACGTCGATGTTCCGTTGGTAACGATATAACTCTGTTCCGCGCCAAAAGTCCGCGCGATGTACTCTTCCGCTTCCAGGTGTGGCCCGGTGTGGTCGAGCAACGAACCAAGCTCGGTGACCGAAATAGAGACATCAGCCTTAAGAGTATTCCCGCCGAAAAAATCATAAAACAGACAGCCAACCGGGCTTTTTGATATGCAGTACCGCCCATATGCCCCGGCGTACAAAGGGTGTACTTCCGCTCTTTGACGTAGGTAAACAAGCTTTCGTGAACGGCGGTGTAATGTTATCAAGATATTCGTCGGTGTACTGACGCATACGAATGGCGATATCTTCCGCCTGCCCCAGCGCATATTCAAAAAACCAGAGCGCCATCCGCATATCCTGCACGCTGACATCCATCGTCGAGTGGGTGTTGATGAAGGCATAAAGCGGGAGATATTCATTAAGCTGATTGATATCGCTACATAAATCGAGACTGTACTCATCCCAGTCAAAATCACGCCGCAAATTCGGGAGTTATACTCGATAAATTTCAGCAATCAACGCTGTTTTTGTGTGAATAATCTGAAAGCCTTGCGCCACCAGCGCCGACTCCAGTTCTTTGATGGGCTCATCTTTATAAAAGACGCCATGCGGTCCCATAATGGCAATGATGTTCATGTGTTCCTCCTGGAAAATCCTTCCTTAATCATAGCCTGCTCAAACCGTGGCGATAAAAGGGTTACCGGTGACCCTTTTTCAGAACTTTGCAATTACGCGTAACCGTAGCTCATCAGGCGCTGATAACGACGGAATTTTTTAAATCTTCAGTGCTTAACACGTCGAGGACGGCCAGATCCGCCAGCAGTTGCGCTTTCAACGATGCCGCCATCGCTTCCGGGTTACGGTGAGCACCACCCAGTGGTTCCGGGATGATGGAGTCGATCAGTTTCAGTTCTTTCAGACGCGGAGCAATGATACCCATCCTTCAGCCGCCAGCGCACAGCCGGCACTCTTCCACAGAATGGACGCACAACCTTCCGGCGAGATAACCGGAATAGGTGCTGTATTGCAGCATATTCGCTTTATCGCCACGCCAATCGCCAGCGCACCGCCAGAACCACCTTCACCGATAACCGTACAAACTACCGGTACGCCGGGCGAGACATTTCACGCAGGTTGCGTGCAATGGCTTCCAGACTGACCACGCTCTTCTGCGCCCACGCGGGGATAAGCCCCGGGGTGTCGGTAAAGGTGATGATAGGCATCTTAAAGCGTTCAGCCATTTGCATCCAGACGCAGTGCTTTGCGGTAACCTTCTGGCGCTGGCATACCAAAGTTACGGCGAATTTTTTCTTTGGTTTCGACCTTTTGATGATAATGATCATCACCGGACGACCATCGAGACGGGCGATACCACCGACGATAGCTTTATCGTCTGCATACGCGCGGTCGCCAGCCAGTTCGTCAAATTCATCAAATGCCAGGCGAACGTAATCCAGGGTATAAGGACGCGCTGTGGATGGCGTGCCAGTTGCGCAATCTGCCATGCACCGAGATCGGCGAAGATTTGTTTGCGGTGTCAGTTCTACGCTTTTTTCACGCAGACGATGCACTTCTTCATCGATGTTAATATCAGTTTCTCATCCTGACGGCTAACCTGCCAGTCAGAGAATCATTTTCGCTTCCAGCTCTGCAATATTGTTCAAAATCAGGAAATTCAGACTCATAGTATTCCTGTATTAGTCAAACTCCAGTTCCACCTGCTCCGAACCAATGAGGCCACGGAGATCGTTTAATAAACGATCGCTCGGAGAGACACGCCACGTCGCGCCAAAACGCAACCGCGCGCATGCATCCGCCCTCTGATAGTAGGAATGTACTGGAATTGTCCCAGAGCGGTGGGGTTCCAGAGACTGACGGAGTCGGTTTAAAGCTGGTCATCAATTTGCCTGTCCGTCAGCGAGATAGCAAGCCCGCGAGCATATTTTTCCCGGGCTTCGTCAATATCCATCACTTCGCGAGCGGTCATTTTAAGCCCACCGCTGAAGTCATCAAAGCTGACCTGTCCGCTGACGATAAGTATGCAGTCTTTCCAGCAATTGCTGGTATTTATCCAGGGGCGTCAGTAAGCAACATCACTTCCAGCCGCCGGAACGGTCATCCAGCGTGCAGATACCGATACGATTGCCGCGCTTGGTGACCATAACCCGCGCGGCAACAACTTAACCCCGCAGCCGTGATGACTTTACCACGTTCTGTCGGGTGCATGTCTTTCAGCCGCCGCCTCCGACATAACGCTCAATCTCTTTTAAATACTGGTTGATAGGGTGTCCGGTCAGGTACAGGCCTAACGTTTCACGTTCCCCATCTAATACCACCTGCTCCGGCCACGGTTGGCAGCTGGCGTAGGATTGTTCAATTTGTTCCGGCTCTTCGGCCAGCACGCCGAACATATCGGCCTGACCGATAGCTTCCGCTTTCGCGTGTTGATCTGCCGCTTTTAACGCATCGCCCAGCGAGTTCATCAGCGCTGCGCGATGTGGCCAAGACGGTCAAACGCCCCGGACATGATCAGTTTTTCCAGCACGCGACGGTTCAACTTTTTGGTGTCGGTACGGGCGCAGAGATCAAACAGTTCGCGGAAGTAGCCGCCTTTATTACGGGCTTCGATGATGGCCTCAATCGGACCTTCACCGACCCCGATCGCGCCAATACCATACACGATTTCGCCGTCGTCGTTGACGTGGAAATGGTAAAGACCGGAGTTTATATCTGGTGGCAGGATTTTCAG"; - tests.push_back(t); + std::vector tests = load_test_case( + std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/target_cudaaligner_cases.fasta", + std::string(CUDAALIGNER_BENCHMARK_DATA_DIR) + "/query_cudaaligner_cases.fasta"); std::minstd_rand rng(random_seed); for (int32_t i = 0; i < n_random_testcases; ++i)