From c97a0f556c28a6a6b5290d3a48528abe97e23cbc Mon Sep 17 00:00:00 2001 From: Martin Steinegger Date: Tue, 22 Aug 2023 17:05:24 +0900 Subject: [PATCH] Rewrite createcomplexreport --- src/strucclustutils/createcomplexreport.cpp | 27 +++++++++++++------- src/strucclustutils/createcomplexreport.h | 12 ++++----- src/strucclustutils/structureconvertalis.cpp | 11 ++++---- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/strucclustutils/createcomplexreport.cpp b/src/strucclustutils/createcomplexreport.cpp index 9c32e069..03a7509b 100644 --- a/src/strucclustutils/createcomplexreport.cpp +++ b/src/strucclustutils/createcomplexreport.cpp @@ -103,10 +103,8 @@ int createcomplexreport(int argc, const char **argv, const Command &command) { const bool isDb = par.dbOut; TranslateNucl translateNucl(static_cast(par.translationTable)); Debug::Progress progress(alnDbr.getSize()); - std::vector complexResVec; Matcher::result_t res; - auto complexDataHandler = ComplexDataHandler(); - std::map complexAlignmentsWithAssId; + std::map allAlignmentsWithAssId; #pragma omp parallel num_threads(localThreads) { @@ -114,6 +112,9 @@ int createcomplexreport(int argc, const char **argv, const Command &command) { #ifdef OPENMP thread_idx = static_cast(omp_get_thread_num()); #endif + + std::map complexAlignmentsWithAssId; + #pragma omp for schedule(dynamic, 10) for (size_t i = 0; i < alnDbr.getSize(); i++) { progress.updateProgress(); @@ -125,29 +126,37 @@ int createcomplexreport(int argc, const char **argv, const Command &command) { getComplexNameChainName(queryId, qCompAndChainName); char *data = alnDbr.getData(i, thread_idx); while (*data != '\0') { - bool isValid = parseScoreComplexResult(data, res, complexDataHandler); - if (!isValid) { - std::cout << "error message"; + std::pair retComplex = parseScoreComplexResult(data, res); + if (retComplex.first == false){ + Debug(Debug::ERROR) << "No scorecomplex result provided"; + EXIT(EXIT_FAILURE); } data = Util::skipLine(data); size_t tHeaderId = tDbrHeader->sequenceReader->getId(res.dbKey); const char *tHeader = tDbrHeader->sequenceReader->getData(tHeaderId, thread_idx); std::string targetId = Util::parseFastaHeader(tHeader); - unsigned int assId = complexDataHandler.assId; + unsigned int assId = retComplex.second.assId; auto key = ComplexAlignmentKey_t(assId, qCompAndChainName.first); if (complexAlignmentsWithAssId.find(key) == complexAlignmentsWithAssId.end()){ - complexAlignmentsWithAssId.insert({key, ComplexAlignment(queryId, targetId, complexDataHandler.qTmScore, complexDataHandler.tTmScore)}); + complexAlignmentsWithAssId.insert({key, ComplexAlignment(queryId, targetId, retComplex.second.qTmScore, retComplex.second.tTmScore)}); } else { complexAlignmentsWithAssId[key].qChainVector.emplace_back(queryId); complexAlignmentsWithAssId[key].tChainVector.emplace_back(targetId); } } // while end } // for end +#pragma omp critical + { + allAlignmentsWithAssId.insert(complexAlignmentsWithAssId.begin(), complexAlignmentsWithAssId.end()); + } } std::map::iterator iter; - for (iter = complexAlignmentsWithAssId.begin(); iter != complexAlignmentsWithAssId.end(); iter++) { + std::vector complexResVec; + + for (iter = allAlignmentsWithAssId.begin(); iter != allAlignmentsWithAssId.end(); iter++) { getResult(iter->second.qChainVector, iter->second.tChainVector, complexResVec, iter->second.qTMScore, iter->second.tTMScore, iter->first.first); } + SORT_SERIAL(complexResVec.begin(), complexResVec.end(), compareComplexResult); for (size_t i=0; i < complexResVec.size(); i++) { resultWriter.writeData(complexResVec[i].result.c_str(), complexResVec[i].result.length(), 0, localThreads - 1, false, false); diff --git a/src/strucclustutils/createcomplexreport.h b/src/strucclustutils/createcomplexreport.h index 9c32805d..c0094da3 100644 --- a/src/strucclustutils/createcomplexreport.h +++ b/src/strucclustutils/createcomplexreport.h @@ -25,7 +25,7 @@ static bool compareComplexResult(const ComplexResult &first, const ComplexResult } struct ComplexDataHandler { - ComplexDataHandler() {} + ComplexDataHandler(): assId(UINT_MAX), qTmScore(0.0f), tTmScore(0.0f) {} ComplexDataHandler(unsigned int assId, double qTmScore, double tTmScore, std::string t, std::string u) : assId(assId), qTmScore(qTmScore), tTmScore(tTmScore), t(t), u(u) {} unsigned int assId; @@ -35,11 +35,12 @@ struct ComplexDataHandler { std::string u; }; -static bool parseScoreComplexResult(const char *data, Matcher::result_t &res, ComplexDataHandler &complexDataHandler) { +static std::pair parseScoreComplexResult(const char *data, Matcher::result_t &res) { const char *entry[255]; size_t columns = Util::getWordsOfLine(data, entry, 255); - if (columns!=16) - return false; + if (columns!=16) { + return std::make_pair(false, ComplexDataHandler()); + } char key[255]; ptrdiff_t keySize = (entry[1] - data); strncpy(key, data, keySize); @@ -66,8 +67,7 @@ static bool parseScoreComplexResult(const char *data, Matcher::result_t &res, Co std::string u = std::string(entry[14], entry[15] - entry[14]-1); unsigned int assId = Util::fast_atoi(entry[15]); res = Matcher::result_t(dbKey, score, qCov, dbCov, seqId, eval, alnLength, qStartPos, qEndPos, qLen, dbStartPos, dbEndPos, dbLen, -1, -1, -1, -1, backtrace); - complexDataHandler = ComplexDataHandler(assId, qTmScore, tTmScore, t, u); - return true; + return std::make_pair(true, ComplexDataHandler(assId, qTmScore, tTmScore, t, u)); } #endif //FOLDSEEK_CREATECOMPLEXREPORT_H diff --git a/src/strucclustutils/structureconvertalis.cpp b/src/strucclustutils/structureconvertalis.cpp index eb290ade..48146d9e 100644 --- a/src/strucclustutils/structureconvertalis.cpp +++ b/src/strucclustutils/structureconvertalis.cpp @@ -548,12 +548,11 @@ int structureconvertalis(int argc, const char **argv, const Command &command) { } char *data = alnDbr.getData(i, thread_idx); Matcher::result_t res; - auto complexDataHandler = ComplexDataHandler(); while (*data != '\0') { const char *entry[255]; Util::getWordsOfLine(data, entry, 255); - isScoreComplexDB = parseScoreComplexResult(data, res, complexDataHandler); - if (!isScoreComplexDB) { + std::pair retComplex = parseScoreComplexResult(data, res); + if (retComplex.first == false) { res = Matcher::parseAlignmentRecord(data, true); } data = Util::skipLine(data); @@ -883,7 +882,7 @@ int structureconvertalis(int argc, const char **argv, const Command &command) { Debug(Debug::ERROR) << "The column qcomplextmscore is only for scorecomplex result.\n"; EXIT(EXIT_FAILURE); } - result.append(SSTR(complexDataHandler.qTmScore)); + result.append(SSTR(retComplex.second.qTmScore)); break; case LocalParameters::OUTFMT_T_COMPLEX_TMSCORE: if (!isScoreComplexDB) { @@ -891,7 +890,7 @@ int structureconvertalis(int argc, const char **argv, const Command &command) { Debug(Debug::ERROR) << "The column tcomplextmscore is only for scorecomplex result.\n"; EXIT(EXIT_FAILURE); } - result.append(SSTR(complexDataHandler.tTmScore)); + result.append(SSTR(retComplex.second.tTmScore)); break; case LocalParameters::OUTFMT_ASSIGN_ID: if (!isScoreComplexDB) { @@ -899,7 +898,7 @@ int structureconvertalis(int argc, const char **argv, const Command &command) { Debug(Debug::ERROR) << "The column assignid is only for scorecomplex result.\n"; EXIT(EXIT_FAILURE); } - result.append(SSTR(complexDataHandler.assId)); + result.append(SSTR(retComplex.second.assId)); break; } if (i < outcodes.size() - 1) {