Skip to content

Commit

Permalink
update EasyComplexSearch; improve expandcomplex stability
Browse files Browse the repository at this point in the history
  • Loading branch information
Woosub-Kim committed Dec 15, 2023
1 parent 258be0f commit 799d42c
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 37 deletions.
30 changes: 5 additions & 25 deletions data/easycomplexsearch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,10 @@ if notExists "${TARGET}.dbtype"; then
TARGET="${TMP_PATH}/target"
fi

if notExists "${TMP_PATH}/result.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" search "${QUERY}" "${TARGET}" "${TMP_PATH}/result" "${TMP_PATH}/search_tmp" ${SEARCH_PAR} \
|| fail "Search died"
fi

RESULT="${TMP_PATH}/result"
if [ "$PREFMODE" != "EXHAUSTIVE" ]; then
if notExists "${TMP_PATH}/result_expand_pref.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" expandcomplex "${QUERY}" "${TARGET}" "${RESULT}" "${TMP_PATH}/result_expand_pref" ${THREADS_PAR} \
|| fail "Expandcomplex died"
fi
if notExists "${TMP_PATH}/result_expand_aligned.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" $COMPLEX_ALIGNMENT_ALGO "${QUERY}" "${TARGET}" "${TMP_PATH}/result_expand_pref" "${TMP_PATH}/result_expand_aligned" ${COMPLEX_ALIGN_PAR} \
|| fail "something died"
fi
RESULT="${TMP_PATH}/result_expand_aligned"
fi
if notExists "${TMP_PATH}/complex_result.dbtype"; then
# shellcheck disable=SC2086
$MMSEQS scorecomplex "${QUERY}" "${TARGET}" "${RESULT}" "${TMP_PATH}/complex_result" ${SCORECOMPLEX_PAR} \
|| fail "ScoreComplex died"
"$MMSEQS" complexsearch "${QUERY}" "${TARGET}" "${TMP_PATH}/complex_result" "${TMP_PATH}/complexsearch_tmp" ${SCORECOMPLEX_PAR} \
|| fail "ComplexSearch died"
fi

# shellcheck disable=SC2086
Expand Down Expand Up @@ -91,6 +71,6 @@ if [ -n "${REMOVE_TMP}" ]; then
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/query_ss" ${VERBOSITY}
fi
rm -rf "${TMP_PATH}/search_tmp"
rm -f "${TMP_PATH}/easyscorecomplex.sh"
fi
rm -rf "${TMP_PATH}/complexsearch_tmp"
rm -f "${TMP_PATH}/easycomplexsearch.sh"
fi
2 changes: 1 addition & 1 deletion src/FoldseekBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ std::vector<Command> foldseekCommands = {
"# Skip prefilter and perform an exhaustive alignment (slower but more sensitive)\n"
"foldseek complexsearch queryDB targetDB result tmp --exhaustive-search 1\n\n",
"Woosub Kim <[email protected]>",
"<i:queryDB> <i:targetDB> <o:outputFileName> <tmpDir>",
"<i:queryDB> <i:targetDB> <o:alignmentDB> <tmpDir>",
CITATION_FOLDSEEK, {
{"queryDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::NEED_HEADER, &DbValidator::sequenceDb},
{"targetDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::NEED_HEADER, &DbValidator::sequenceDb},
Expand Down
19 changes: 11 additions & 8 deletions src/strucclustutils/expandcomplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "LocalParameters.h"
#include "MemoryMapped.h"
#include "createcomplexreport.h"

#include <set>
#ifdef OPENMP
#include <omp.h>
#endif
Expand Down Expand Up @@ -53,7 +53,7 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
thread_idx = static_cast<unsigned int>(omp_get_thread_num());
#endif
resultToWrite_t result;
std::vector<unsigned int> dbFoundIndices;
std::set<unsigned int> dbFoundIndices;
std::vector<ChainKeyPair_t> chainKeyPairs;
#pragma omp for schedule(dynamic, 1)
// for each q complex
Expand All @@ -72,20 +72,23 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
const auto dbChainKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10);
const unsigned int dbComplexId = dbChainKeyToComplexIdMap.at(dbChainKey);
// find all db complex aligned to the query complex.
if (std::find(dbFoundIndices.begin(), dbFoundIndices.end(), dbComplexId) == dbFoundIndices.end())
dbFoundIndices.emplace_back(dbComplexId);
dbFoundIndices.insert(dbComplexId);
data = Util::skipLine(data);
}
}
if (dbFoundIndices.empty())
if (dbFoundIndices.empty()) {
for (size_t qChainIdx=0; qChainIdx<qChainKeys.size(); qChainIdx++) {
resultWriter.writeData(result.c_str(),result.length(),qChainKeys[qChainIdx],thread_idx);
}
continue;
}
// Among all db complexes aligned to query complex
for (size_t dbIdx=0; dbIdx<dbFoundIndices.size(); dbIdx++) {
std::vector<unsigned int> &dbChainKeys = dbComplexIdToChainKeysMap.at(dbFoundIndices[dbIdx]);
for (auto dbIter = dbFoundIndices.begin(); dbIter != dbFoundIndices.end(); dbIter++) {
std::vector<unsigned int> &dbChainKeys = dbComplexIdToChainKeysMap.at(*dbIter);
// for all query chains
for (size_t qChainIdx=0; qChainIdx<qChainKeys.size(); qChainIdx++) {
// and target chains
for (size_t dbChainIdx=0; dbChainIdx<dbChainKeys.size(); dbChainIdx++) {
for (size_t dbChainIdx = 0; dbChainIdx < dbChainKeys.size(); dbChainIdx++) {
// get all possible alignments
chainKeyPairs.emplace_back(qChainKeys[qChainIdx], dbChainKeys[dbChainIdx]);
}
Expand Down
5 changes: 2 additions & 3 deletions src/workflow/EasyComplexSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ int easycomplexsearch(int argc, const char **argv, const Command &command) {
cmd.addVariable("LEAVE_INPUT", par.dbOut ? "TRUE" : NULL);
par.filenames.pop_back();
cmd.addVariable("CREATEDB_PAR", par.createParameterString(par.structurecreatedb).c_str());
cmd.addVariable("SEARCH_PAR", par.createParameterString(par.structuresearchworkflow, true).c_str());
cmd.addVariable("SCORECOMPLEX_PAR", par.createParameterString(par.scorecomplex).c_str());
cmd.addVariable("COMPLEXSEARCH_PAR", par.createParameterString(par.complexsearchworkflow).c_str());
cmd.addVariable("CONVERT_PAR", par.createParameterString(par.convertalignments).c_str());
cmd.addVariable("REPORT_PAR", par.createParameterString(par.createcomplexreport).c_str());
cmd.addVariable("THREADS_PAR", par.createParameterString(par.onlythreads).c_str());
Expand All @@ -131,4 +130,4 @@ int easycomplexsearch(int argc, const char **argv, const Command &command) {
// Should never get here
assert(false);
return EXIT_FAILURE;
}
}

0 comments on commit 799d42c

Please sign in to comment.