From c74a1a5f347d33722f438d57c6bf373cdafbd488 Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Wed, 25 Sep 2024 12:29:53 +0900 Subject: [PATCH 01/11] replace singlechain mode into monomer mode --- src/commons/LocalParameters.cpp | 6 +++--- src/commons/LocalParameters.h | 4 ++-- src/strucclustutils/MultimerUtil.h | 2 +- src/strucclustutils/scoremultimer.cpp | 30 +++++++++++++-------------- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/commons/LocalParameters.cpp b/src/commons/LocalParameters.cpp index b167ab69..e2fc4426 100644 --- a/src/commons/LocalParameters.cpp +++ b/src/commons/LocalParameters.cpp @@ -23,7 +23,7 @@ LocalParameters::LocalParameters() : PARAM_N_SAMPLE(PARAM_N_SAMPLE_ID, "--n-sample", "Sample size","pick N random sample" ,typeid(int), (void *) &nsample, "^[0-9]{1}[0-9]*$"), PARAM_COORD_STORE_MODE(PARAM_COORD_STORE_MODE_ID, "--coord-store-mode", "Coord store mode", "Coordinate storage mode: \n1: C-alpha as float\n2: C-alpha as difference (uint16_t)", typeid(int), (void *) &coordStoreMode, "^[1-2]{1}$",MMseqsParameter::COMMAND_EXPERT), PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD_ID, "--min-assigned-chains-ratio", "Minimum assigned chains percentage Threshold", "Minimum ratio of assigned chains out of all query chains > thr [0.0,1.0]", typeid(float), (void *) & minAssignedChainsThreshold, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_ALIGN), - PARAM_SINGLE_CHAIN_INCLUDE_MODE(PARAM_SINGLE_CHAIN_INCLUDE_MODE_ID, "--single-chain-include-mode", "Single Chained Assignments Inclusion Mode for Multimer", "Single Chained Assignments Inclusion 0: include single chained assignments, 1: NOT include single chained assignment", typeid(int), (void *) & singleChainIncludeMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN), + PARAM_MONOMER_INCLUDE_MODE(PARAM_MONOMER_INCLUDE_MODE_ID, "--single-monomer-mode", "Monomer inclusion Mode for MultimerSerch", "Monomer Complex Inclusion 0: include monomers, 1: NOT include monomers", typeid(int), (void *) & monomerIncludeMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN), PARAM_CLUSTER_SEARCH(PARAM_CLUSTER_SEARCH_ID, "--cluster-search", "Cluster search", "first find representative then align all cluster members", typeid(int), (void *) &clusterSearch, "^[0-1]{1}$",MMseqsParameter::COMMAND_MISC), PARAM_FILE_INCLUDE(PARAM_FILE_INCLUDE_ID, "--file-include", "File Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &fileInclude, "^.*$"), PARAM_FILE_EXCLUDE(PARAM_FILE_EXCLUDE_ID, "--file-exclude", "File Exclusion Regex", "Exclude file names based on this regex", typeid(std::string), (void *) &fileExclude, "^.*$"), @@ -191,7 +191,7 @@ LocalParameters::LocalParameters() : //scorecmultimer scoremultimer.push_back(&PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD); - scoremultimer.push_back(&PARAM_SINGLE_CHAIN_INCLUDE_MODE); + scoremultimer.push_back(&PARAM_MONOMER_INCLUDE_MODE); scoremultimer.push_back(&PARAM_THREADS); scoremultimer.push_back(&PARAM_V); @@ -253,7 +253,7 @@ LocalParameters::LocalParameters() : maskBfactorThreshold = 0; chainNameMode = 0; minAssignedChainsThreshold = 0.0; - singleChainIncludeMode = 0; + monomerIncludeMode = 0; writeMapping = 0; tmAlignFast = 1; exactTMscore = 0; diff --git a/src/commons/LocalParameters.h b/src/commons/LocalParameters.h index 115f696b..4d0eee25 100644 --- a/src/commons/LocalParameters.h +++ b/src/commons/LocalParameters.h @@ -130,7 +130,7 @@ class LocalParameters : public Parameters { PARAMETER(PARAM_N_SAMPLE) PARAMETER(PARAM_COORD_STORE_MODE) PARAMETER(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD) - PARAMETER(PARAM_SINGLE_CHAIN_INCLUDE_MODE) + PARAMETER(PARAM_MONOMER_INCLUDE_MODE) PARAMETER(PARAM_CLUSTER_SEARCH) PARAMETER(PARAM_FILE_INCLUDE) PARAMETER(PARAM_FILE_EXCLUDE) @@ -162,7 +162,7 @@ class LocalParameters : public Parameters { int nsample; int coordStoreMode; float minAssignedChainsThreshold; - int singleChainIncludeMode; + int monomerIncludeMode; int clusterSearch; std::string fileInclude; std::string fileExclude; diff --git a/src/strucclustutils/MultimerUtil.h b/src/strucclustutils/MultimerUtil.h index ba539a31..dea547b5 100644 --- a/src/strucclustutils/MultimerUtil.h +++ b/src/strucclustutils/MultimerUtil.h @@ -15,7 +15,7 @@ const float LEARNING_RATE = 0.1; const float TM_SCORE_MARGIN = 0.7; const unsigned int MULTIPLE_CHAINED_COMPLEX = 2; const unsigned int SIZE_OF_SUPERPOSITION_VECTOR = 12; -const int SKIP_SINGLE_CHAIN_ASSIGNMENTS = 1; +const int SKIP_MONOMERS = 1; typedef std::pair compNameChainName_t; typedef std::map chainKeyToComplexId_t; typedef std::map> complexIdToChainKeys_t; diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index c1287fae..1e41aedd 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -31,11 +31,11 @@ struct SearchResult { dbResidueLen = residueLen; } - void standardize(int singleChainedAssignmentIncludeMode) { + void standardize(int MonomerIncludeMode) { if (dbResidueLen == 0) alnVec.clear(); - if (singleChainedAssignmentIncludeMode==SKIP_SINGLE_CHAIN_ASSIGNMENTS && dbChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) + if (MonomerIncludeMode == SKIP_MONOMERS && dbChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) alnVec.clear(); if (alnVec.empty()) @@ -182,11 +182,9 @@ bool compareNeighborWithDist(const NeighborsWithDist &first, const NeighborsWith class DBSCANCluster { public: - DBSCANCluster(SearchResult &searchResult, std::set &finalClusters, double minCov, int singleChainMode) : searchResult(searchResult), finalClusters(finalClusters) { + DBSCANCluster(SearchResult &searchResult, std::set &finalClusters, double minCov) : searchResult(searchResult), finalClusters(finalClusters) { cLabel = 0; minimumClusterSize = (unsigned int) ((double) searchResult.qChainKeys.size() * minCov); - if (singleChainMode == SKIP_SINGLE_CHAIN_ASSIGNMENTS) - minimumClusterSize = std::max(MULTIPLE_CHAINED_COMPLEX, minimumClusterSize); maximumClusterSize = std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()); maximumClusterNum = searchResult.alnVec.size() / maximumClusterSize; prevMaxClusterSize = 0; @@ -196,9 +194,9 @@ class DBSCANCluster { } bool getAlnClusters() { - // if Query or Target is a Single Chain Complex. + // if Query or Target is a Monomer Complex. if (std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()) < MULTIPLE_CHAINED_COMPLEX) - return earlyStopForSingleChainComplex(); + return earlyStopForMonomers(); // rbh filter filterAlnsByRBH(); @@ -234,7 +232,7 @@ class DBSCANCluster { std::map qBestTmScore; std::map dbBestTmScore; - bool earlyStopForSingleChainComplex() { + bool earlyStopForMonomers() { if (minimumClusterSize >= MULTIPLE_CHAINED_COMPLEX) return finishDBSCAN(); @@ -472,7 +470,7 @@ class DBSCANCluster { class ComplexScorer { public: - ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, double minAssignedChainsRatio, int singleChainedAssignmentIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), singleChainedAssignmentIncludeMode(singleChainedAssignmentIncludeMode) { + ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, double minAssignedChainsRatio, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), monomerIncludeMode(monomerIncludeMode) { maxChainLen = std::max(qDbr3Di->sequenceReader->getMaxSeqLen()+1, tDbr3Di->sequenceReader->getMaxSeqLen()+1); q3diDbr = qDbr3Di; t3diDbr = tDbr3Di; @@ -538,7 +536,7 @@ class ComplexScorer { paredSearchResult.alnVec.emplace_back(aln); continue; } - paredSearchResult.standardize(singleChainedAssignmentIncludeMode); + paredSearchResult.standardize(monomerIncludeMode); if (!paredSearchResult.alnVec.empty()) searchResults.emplace_back(paredSearchResult); @@ -550,7 +548,7 @@ class ComplexScorer { paredSearchResult.alnVec.emplace_back(aln); } currAlns.clear(); - paredSearchResult.standardize(singleChainedAssignmentIncludeMode); + paredSearchResult.standardize(monomerIncludeMode); if (!paredSearchResult.alnVec.empty()) searchResults.emplace_back(paredSearchResult); @@ -564,7 +562,7 @@ class ComplexScorer { tmAligner = new TMaligner(maxResLen, false, true, false); } finalClusters.clear(); - DBSCANCluster dbscanCluster(searchResult, finalClusters, minAssignedChainsRatio, singleChainedAssignmentIncludeMode); + DBSCANCluster dbscanCluster(searchResult, finalClusters, minAssignedChainsRatio); if (!dbscanCluster.getAlnClusters()) { finalClusters.clear(); return; @@ -610,7 +608,7 @@ class ComplexScorer { SearchResult paredSearchResult; std::set finalClusters; bool hasBacktrace; - int singleChainedAssignmentIncludeMode; + int monomerIncludeMode; unsigned int getQueryResidueLength(std::vector &qChainKeys) { unsigned int qResidueLen = 0; @@ -704,7 +702,7 @@ int scoremultimer(int argc, const char **argv, const Command &command) { } float minAssignedChainsRatio = par.minAssignedChainsThreshold > MAX_ASSIGNED_CHAIN_RATIO ? MAX_ASSIGNED_CHAIN_RATIO: par.minAssignedChainsThreshold; - int singleChainIncludeMode = par.singleChainIncludeMode; + int monomerIncludeMode = par.monomerIncludeMode; std::vector qComplexIndices; std::vector dbComplexIndices; @@ -730,13 +728,13 @@ int scoremultimer(int argc, const char **argv, const Command &command) { std::vector searchResults; std::vector assignments; std::vector resultToWriteLines; - ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio, singleChainIncludeMode); + ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio, monomerIncludeMode); #pragma omp for schedule(dynamic, 1) // for each q complex for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) { unsigned int qComplexId = qComplexIndices[qCompIdx]; std::vector &qChainKeys = qComplexIdToChainKeysMap.at(qComplexId); - if (par.singleChainIncludeMode == SKIP_SINGLE_CHAIN_ASSIGNMENTS && qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) + if (par.monomerIncludeMode == SKIP_MONOMERS && qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) continue; complexScorer.getSearchResults(qComplexId, qChainKeys, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, searchResults); // for each db complex From 0b1fa42392e032a5d604eff445f9640a82dffaa3 Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Wed, 25 Sep 2024 12:45:17 +0900 Subject: [PATCH 02/11] typo --- src/commons/LocalParameters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commons/LocalParameters.cpp b/src/commons/LocalParameters.cpp index e2fc4426..eff70a4c 100644 --- a/src/commons/LocalParameters.cpp +++ b/src/commons/LocalParameters.cpp @@ -23,7 +23,7 @@ LocalParameters::LocalParameters() : PARAM_N_SAMPLE(PARAM_N_SAMPLE_ID, "--n-sample", "Sample size","pick N random sample" ,typeid(int), (void *) &nsample, "^[0-9]{1}[0-9]*$"), PARAM_COORD_STORE_MODE(PARAM_COORD_STORE_MODE_ID, "--coord-store-mode", "Coord store mode", "Coordinate storage mode: \n1: C-alpha as float\n2: C-alpha as difference (uint16_t)", typeid(int), (void *) &coordStoreMode, "^[1-2]{1}$",MMseqsParameter::COMMAND_EXPERT), PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD_ID, "--min-assigned-chains-ratio", "Minimum assigned chains percentage Threshold", "Minimum ratio of assigned chains out of all query chains > thr [0.0,1.0]", typeid(float), (void *) & minAssignedChainsThreshold, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_ALIGN), - PARAM_MONOMER_INCLUDE_MODE(PARAM_MONOMER_INCLUDE_MODE_ID, "--single-monomer-mode", "Monomer inclusion Mode for MultimerSerch", "Monomer Complex Inclusion 0: include monomers, 1: NOT include monomers", typeid(int), (void *) & monomerIncludeMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN), + PARAM_MONOMER_INCLUDE_MODE(PARAM_MONOMER_INCLUDE_MODE_ID, "--monomer-include-mode", "Monomer inclusion Mode for MultimerSerch", "Monomer Complex Inclusion 0: include monomers, 1: NOT include monomers", typeid(int), (void *) & monomerIncludeMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN), PARAM_CLUSTER_SEARCH(PARAM_CLUSTER_SEARCH_ID, "--cluster-search", "Cluster search", "first find representative then align all cluster members", typeid(int), (void *) &clusterSearch, "^[0-1]{1}$",MMseqsParameter::COMMAND_MISC), PARAM_FILE_INCLUDE(PARAM_FILE_INCLUDE_ID, "--file-include", "File Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &fileInclude, "^.*$"), PARAM_FILE_EXCLUDE(PARAM_FILE_EXCLUDE_ID, "--file-exclude", "File Exclusion Regex", "Exclude file names based on this regex", typeid(std::string), (void *) &fileExclude, "^.*$"), From cd26d54c98e99e9a86dd37c3945fd0886af9d93c Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Thu, 26 Sep 2024 13:58:16 +0900 Subject: [PATCH 03/11] implement complex-tm-threshold --- src/strucclustutils/MultimerUtil.h | 1 + src/strucclustutils/scoremultimer.cpp | 16 ++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/strucclustutils/MultimerUtil.h b/src/strucclustutils/MultimerUtil.h index dea547b5..da7df4bc 100644 --- a/src/strucclustutils/MultimerUtil.h +++ b/src/strucclustutils/MultimerUtil.h @@ -15,6 +15,7 @@ const float LEARNING_RATE = 0.1; const float TM_SCORE_MARGIN = 0.7; const unsigned int MULTIPLE_CHAINED_COMPLEX = 2; const unsigned int SIZE_OF_SUPERPOSITION_VECTOR = 12; +const float maxTmScore = 1.0; const int SKIP_MONOMERS = 1; typedef std::pair compNameChainName_t; typedef std::map chainKeyToComplexId_t; diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index 1e41aedd..d86e698a 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -470,7 +470,7 @@ class DBSCANCluster { class ComplexScorer { public: - ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, double minAssignedChainsRatio, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), monomerIncludeMode(monomerIncludeMode) { + ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, float minAssignedChainsRatio, float complexTmThr, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), complexTmThr(complexTmThr), monomerIncludeMode(monomerIncludeMode) { maxChainLen = std::max(qDbr3Di->sequenceReader->getMaxSeqLen()+1, tDbr3Di->sequenceReader->getMaxSeqLen()+1); q3diDbr = qDbr3Di; t3diDbr = tDbr3Di; @@ -573,8 +573,10 @@ class ComplexScorer { assignment.appendChainToChainAln(searchResult.alnVec[alnIdx]); } assignment.getTmScore(*tmAligner); - assignment.updateResultToWriteLines(); - assignments.emplace_back(assignment); + if (assignment.qTmScore >= complexTmThr) { + assignment.updateResultToWriteLines(); + assignments.emplace_back(assignment); + } assignment.reset(); } finalClusters.clear(); @@ -598,7 +600,8 @@ class ComplexScorer { Coordinate16 qCoords; Coordinate16 tCoords; unsigned int thread_idx; - double minAssignedChainsRatio; + float minAssignedChainsRatio; + float complexTmThr; unsigned int maxResLen; Chain qChain; Chain dbChain; @@ -702,6 +705,7 @@ int scoremultimer(int argc, const char **argv, const Command &command) { } float minAssignedChainsRatio = par.minAssignedChainsThreshold > MAX_ASSIGNED_CHAIN_RATIO ? MAX_ASSIGNED_CHAIN_RATIO: par.minAssignedChainsThreshold; + float complexTmThr = par.complexTMScoreThreshold > maxTmScore ? maxTmScore: par.complexTMScoreThreshold; int monomerIncludeMode = par.monomerIncludeMode; std::vector qComplexIndices; @@ -728,13 +732,13 @@ int scoremultimer(int argc, const char **argv, const Command &command) { std::vector searchResults; std::vector assignments; std::vector resultToWriteLines; - ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio, monomerIncludeMode); + ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio, complexTmThr, monomerIncludeMode); #pragma omp for schedule(dynamic, 1) // for each q complex for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) { unsigned int qComplexId = qComplexIndices[qCompIdx]; std::vector &qChainKeys = qComplexIdToChainKeysMap.at(qComplexId); - if (par.monomerIncludeMode == SKIP_MONOMERS && qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) + if (monomerIncludeMode == SKIP_MONOMERS && qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) continue; complexScorer.getSearchResults(qComplexId, qChainKeys, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, searchResults); // for each db complex From 1800e6a94e1b2cdf4e100f5f4538ee2521d605f8 Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Thu, 26 Sep 2024 14:55:26 +0900 Subject: [PATCH 04/11] update for single chained alignments --- src/commons/LocalParameters.cpp | 1 + src/commons/LocalParameters.h | 2 ++ src/strucclustutils/scoremultimer.cpp | 13 +++++++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/commons/LocalParameters.cpp b/src/commons/LocalParameters.cpp index eff70a4c..8766a154 100644 --- a/src/commons/LocalParameters.cpp +++ b/src/commons/LocalParameters.cpp @@ -23,6 +23,7 @@ LocalParameters::LocalParameters() : PARAM_N_SAMPLE(PARAM_N_SAMPLE_ID, "--n-sample", "Sample size","pick N random sample" ,typeid(int), (void *) &nsample, "^[0-9]{1}[0-9]*$"), PARAM_COORD_STORE_MODE(PARAM_COORD_STORE_MODE_ID, "--coord-store-mode", "Coord store mode", "Coordinate storage mode: \n1: C-alpha as float\n2: C-alpha as difference (uint16_t)", typeid(int), (void *) &coordStoreMode, "^[1-2]{1}$",MMseqsParameter::COMMAND_EXPERT), PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD_ID, "--min-assigned-chains-ratio", "Minimum assigned chains percentage Threshold", "Minimum ratio of assigned chains out of all query chains > thr [0.0,1.0]", typeid(float), (void *) & minAssignedChainsThreshold, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_ALIGN), + PARAM_COMPLEX_TM_THRESHOLD(PARAM_COMPLEX_TM_THRESHOLD_ID, "--complex-tmscore-threshold", "Complex TMscore threshold", "accept alignments with a q complex tmsore > thr [0.0,1.0]",typeid(float), (void *) &complexTMScoreThreshold, "^0(\\.[0-9]+)?|1(\\.0+)?$"), PARAM_MONOMER_INCLUDE_MODE(PARAM_MONOMER_INCLUDE_MODE_ID, "--monomer-include-mode", "Monomer inclusion Mode for MultimerSerch", "Monomer Complex Inclusion 0: include monomers, 1: NOT include monomers", typeid(int), (void *) & monomerIncludeMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN), PARAM_CLUSTER_SEARCH(PARAM_CLUSTER_SEARCH_ID, "--cluster-search", "Cluster search", "first find representative then align all cluster members", typeid(int), (void *) &clusterSearch, "^[0-1]{1}$",MMseqsParameter::COMMAND_MISC), PARAM_FILE_INCLUDE(PARAM_FILE_INCLUDE_ID, "--file-include", "File Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &fileInclude, "^.*$"), diff --git a/src/commons/LocalParameters.h b/src/commons/LocalParameters.h index 4d0eee25..f8274bee 100644 --- a/src/commons/LocalParameters.h +++ b/src/commons/LocalParameters.h @@ -130,6 +130,7 @@ class LocalParameters : public Parameters { PARAMETER(PARAM_N_SAMPLE) PARAMETER(PARAM_COORD_STORE_MODE) PARAMETER(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD) + PARAMETER(PARAM_COMPLEX_TM_THRESHOLD) PARAMETER(PARAM_MONOMER_INCLUDE_MODE) PARAMETER(PARAM_CLUSTER_SEARCH) PARAMETER(PARAM_FILE_INCLUDE) @@ -162,6 +163,7 @@ class LocalParameters : public Parameters { int nsample; int coordStoreMode; float minAssignedChainsThreshold; + float complexTMScoreThreshold; int monomerIncludeMode; int clusterSearch; std::string fileInclude; diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index d86e698a..a9c38777 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -236,11 +236,16 @@ class DBSCANCluster { if (minimumClusterSize >= MULTIPLE_CHAINED_COMPLEX) return finishDBSCAN(); + getSingleChainedCluster(); + return finishDBSCAN(); + } + + void getSingleChainedCluster() { + finalClusters.clear(); for (unsigned int alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++ ) { neighbors = {alnIdx}; finalClusters.insert(neighbors); } - return finishDBSCAN(); } bool runDBSCAN() { @@ -312,6 +317,10 @@ class DBSCANCluster { eps += learningRate; } + // + if (minimumClusterSize < MULTIPLE_CHAINED_COMPLEX && currMaxClusterSize < MULTIPLE_CHAINED_COMPLEX) + getSingleChainedCluster(); + return finishDBSCAN(); } @@ -573,7 +582,7 @@ class ComplexScorer { assignment.appendChainToChainAln(searchResult.alnVec[alnIdx]); } assignment.getTmScore(*tmAligner); - if (assignment.qTmScore >= complexTmThr) { + if (assignment.qTmScore > complexTmThr) { assignment.updateResultToWriteLines(); assignments.emplace_back(assignment); } From 704c3a82da6c0f88365957130063c0fb1ccd4651 Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Thu, 26 Sep 2024 15:37:45 +0900 Subject: [PATCH 05/11] fix chain cov ratio --- src/commons/LocalParameters.cpp | 2 ++ src/strucclustutils/scoremultimer.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/commons/LocalParameters.cpp b/src/commons/LocalParameters.cpp index 8766a154..90d9128c 100644 --- a/src/commons/LocalParameters.cpp +++ b/src/commons/LocalParameters.cpp @@ -192,6 +192,7 @@ LocalParameters::LocalParameters() : //scorecmultimer scoremultimer.push_back(&PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD); + scoremultimer.push_back(&PARAM_COMPLEX_TM_THRESHOLD); scoremultimer.push_back(&PARAM_MONOMER_INCLUDE_MODE); scoremultimer.push_back(&PARAM_THREADS); scoremultimer.push_back(&PARAM_V); @@ -254,6 +255,7 @@ LocalParameters::LocalParameters() : maskBfactorThreshold = 0; chainNameMode = 0; minAssignedChainsThreshold = 0.0; + complexTMScoreThreshold = 0.0; monomerIncludeMode = 0; writeMapping = 0; tmAlignFast = 1; diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index a9c38777..222b7db5 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -184,7 +184,7 @@ class DBSCANCluster { public: DBSCANCluster(SearchResult &searchResult, std::set &finalClusters, double minCov) : searchResult(searchResult), finalClusters(finalClusters) { cLabel = 0; - minimumClusterSize = (unsigned int) ((double) searchResult.qChainKeys.size() * minCov); + minimumClusterSize = std::ceil((float) searchResult.qChainKeys.size() * minCov); maximumClusterSize = std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()); maximumClusterNum = searchResult.alnVec.size() / maximumClusterSize; prevMaxClusterSize = 0; From 43fd26f3d3e043c8f9fd4c2b193a8b68f8781689 Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Thu, 26 Sep 2024 15:50:17 +0900 Subject: [PATCH 06/11] remove tmscore threshold --- src/commons/LocalParameters.cpp | 3 --- src/commons/LocalParameters.h | 2 -- src/strucclustutils/MultimerUtil.h | 1 - src/strucclustutils/scoremultimer.cpp | 12 ++++-------- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/commons/LocalParameters.cpp b/src/commons/LocalParameters.cpp index 90d9128c..eff70a4c 100644 --- a/src/commons/LocalParameters.cpp +++ b/src/commons/LocalParameters.cpp @@ -23,7 +23,6 @@ LocalParameters::LocalParameters() : PARAM_N_SAMPLE(PARAM_N_SAMPLE_ID, "--n-sample", "Sample size","pick N random sample" ,typeid(int), (void *) &nsample, "^[0-9]{1}[0-9]*$"), PARAM_COORD_STORE_MODE(PARAM_COORD_STORE_MODE_ID, "--coord-store-mode", "Coord store mode", "Coordinate storage mode: \n1: C-alpha as float\n2: C-alpha as difference (uint16_t)", typeid(int), (void *) &coordStoreMode, "^[1-2]{1}$",MMseqsParameter::COMMAND_EXPERT), PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD_ID, "--min-assigned-chains-ratio", "Minimum assigned chains percentage Threshold", "Minimum ratio of assigned chains out of all query chains > thr [0.0,1.0]", typeid(float), (void *) & minAssignedChainsThreshold, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_ALIGN), - PARAM_COMPLEX_TM_THRESHOLD(PARAM_COMPLEX_TM_THRESHOLD_ID, "--complex-tmscore-threshold", "Complex TMscore threshold", "accept alignments with a q complex tmsore > thr [0.0,1.0]",typeid(float), (void *) &complexTMScoreThreshold, "^0(\\.[0-9]+)?|1(\\.0+)?$"), PARAM_MONOMER_INCLUDE_MODE(PARAM_MONOMER_INCLUDE_MODE_ID, "--monomer-include-mode", "Monomer inclusion Mode for MultimerSerch", "Monomer Complex Inclusion 0: include monomers, 1: NOT include monomers", typeid(int), (void *) & monomerIncludeMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN), PARAM_CLUSTER_SEARCH(PARAM_CLUSTER_SEARCH_ID, "--cluster-search", "Cluster search", "first find representative then align all cluster members", typeid(int), (void *) &clusterSearch, "^[0-1]{1}$",MMseqsParameter::COMMAND_MISC), PARAM_FILE_INCLUDE(PARAM_FILE_INCLUDE_ID, "--file-include", "File Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &fileInclude, "^.*$"), @@ -192,7 +191,6 @@ LocalParameters::LocalParameters() : //scorecmultimer scoremultimer.push_back(&PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD); - scoremultimer.push_back(&PARAM_COMPLEX_TM_THRESHOLD); scoremultimer.push_back(&PARAM_MONOMER_INCLUDE_MODE); scoremultimer.push_back(&PARAM_THREADS); scoremultimer.push_back(&PARAM_V); @@ -255,7 +253,6 @@ LocalParameters::LocalParameters() : maskBfactorThreshold = 0; chainNameMode = 0; minAssignedChainsThreshold = 0.0; - complexTMScoreThreshold = 0.0; monomerIncludeMode = 0; writeMapping = 0; tmAlignFast = 1; diff --git a/src/commons/LocalParameters.h b/src/commons/LocalParameters.h index f8274bee..4d0eee25 100644 --- a/src/commons/LocalParameters.h +++ b/src/commons/LocalParameters.h @@ -130,7 +130,6 @@ class LocalParameters : public Parameters { PARAMETER(PARAM_N_SAMPLE) PARAMETER(PARAM_COORD_STORE_MODE) PARAMETER(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD) - PARAMETER(PARAM_COMPLEX_TM_THRESHOLD) PARAMETER(PARAM_MONOMER_INCLUDE_MODE) PARAMETER(PARAM_CLUSTER_SEARCH) PARAMETER(PARAM_FILE_INCLUDE) @@ -163,7 +162,6 @@ class LocalParameters : public Parameters { int nsample; int coordStoreMode; float minAssignedChainsThreshold; - float complexTMScoreThreshold; int monomerIncludeMode; int clusterSearch; std::string fileInclude; diff --git a/src/strucclustutils/MultimerUtil.h b/src/strucclustutils/MultimerUtil.h index da7df4bc..dea547b5 100644 --- a/src/strucclustutils/MultimerUtil.h +++ b/src/strucclustutils/MultimerUtil.h @@ -15,7 +15,6 @@ const float LEARNING_RATE = 0.1; const float TM_SCORE_MARGIN = 0.7; const unsigned int MULTIPLE_CHAINED_COMPLEX = 2; const unsigned int SIZE_OF_SUPERPOSITION_VECTOR = 12; -const float maxTmScore = 1.0; const int SKIP_MONOMERS = 1; typedef std::pair compNameChainName_t; typedef std::map chainKeyToComplexId_t; diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index 222b7db5..acfa1513 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -479,7 +479,7 @@ class DBSCANCluster { class ComplexScorer { public: - ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, float minAssignedChainsRatio, float complexTmThr, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), complexTmThr(complexTmThr), monomerIncludeMode(monomerIncludeMode) { + ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, float minAssignedChainsRatio, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), monomerIncludeMode(monomerIncludeMode) { maxChainLen = std::max(qDbr3Di->sequenceReader->getMaxSeqLen()+1, tDbr3Di->sequenceReader->getMaxSeqLen()+1); q3diDbr = qDbr3Di; t3diDbr = tDbr3Di; @@ -582,10 +582,8 @@ class ComplexScorer { assignment.appendChainToChainAln(searchResult.alnVec[alnIdx]); } assignment.getTmScore(*tmAligner); - if (assignment.qTmScore > complexTmThr) { - assignment.updateResultToWriteLines(); - assignments.emplace_back(assignment); - } + assignment.updateResultToWriteLines(); + assignments.emplace_back(assignment); assignment.reset(); } finalClusters.clear(); @@ -610,7 +608,6 @@ class ComplexScorer { Coordinate16 tCoords; unsigned int thread_idx; float minAssignedChainsRatio; - float complexTmThr; unsigned int maxResLen; Chain qChain; Chain dbChain; @@ -714,7 +711,6 @@ int scoremultimer(int argc, const char **argv, const Command &command) { } float minAssignedChainsRatio = par.minAssignedChainsThreshold > MAX_ASSIGNED_CHAIN_RATIO ? MAX_ASSIGNED_CHAIN_RATIO: par.minAssignedChainsThreshold; - float complexTmThr = par.complexTMScoreThreshold > maxTmScore ? maxTmScore: par.complexTMScoreThreshold; int monomerIncludeMode = par.monomerIncludeMode; std::vector qComplexIndices; @@ -741,7 +737,7 @@ int scoremultimer(int argc, const char **argv, const Command &command) { std::vector searchResults; std::vector assignments; std::vector resultToWriteLines; - ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio, complexTmThr, monomerIncludeMode); + ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio, monomerIncludeMode); #pragma omp for schedule(dynamic, 1) // for each q complex for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) { From 83cc643bd9a403377277d62a7332c401178bec5b Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Fri, 27 Sep 2024 14:24:07 +0900 Subject: [PATCH 07/11] update single chain cluster --- src/strucclustutils/scoremultimer.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index acfa1513..1c94eb44 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -242,9 +242,15 @@ class DBSCANCluster { void getSingleChainedCluster() { finalClusters.clear(); - for (unsigned int alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++ ) { - neighbors = {alnIdx}; - finalClusters.insert(neighbors); + float maxScore = FLT_MIN; + float score; + for (unsigned int alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++) { + auto &aln = searchResult.alnVec[alnIdx]; + score = aln.tmScore * (float) aln.matches; + if (score < maxScore) + continue; + maxScore = score; + finalClusters = {{alnIdx}}; } } From 2dadffc05245e8d5f124662caf6ea621f18a51d6 Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Fri, 27 Sep 2024 16:13:30 +0900 Subject: [PATCH 08/11] test rbh filter off --- src/strucclustutils/scoremultimer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index 1e41aedd..2a9aabea 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -184,7 +184,7 @@ class DBSCANCluster { public: DBSCANCluster(SearchResult &searchResult, std::set &finalClusters, double minCov) : searchResult(searchResult), finalClusters(finalClusters) { cLabel = 0; - minimumClusterSize = (unsigned int) ((double) searchResult.qChainKeys.size() * minCov); + minimumClusterSize = std::ceil((float) searchResult.qChainKeys.size() * minCov); maximumClusterSize = std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()); maximumClusterNum = searchResult.alnVec.size() / maximumClusterSize; prevMaxClusterSize = 0; @@ -199,7 +199,7 @@ class DBSCANCluster { return earlyStopForMonomers(); // rbh filter - filterAlnsByRBH(); +// filterAlnsByRBH(); fillDistMatrix(); // To skip DBSCAN clustering when alignments are few enough. if (searchResult.alnVec.size() <= maximumClusterSize) From 4046f00fc94a5ed289f3cb48123990ee1f15664c Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Fri, 27 Sep 2024 16:15:08 +0900 Subject: [PATCH 09/11] test rbh filter off --- src/strucclustutils/scoremultimer.cpp | 29 +++++++-------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index ba939864..2a9aabea 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -236,22 +236,11 @@ class DBSCANCluster { if (minimumClusterSize >= MULTIPLE_CHAINED_COMPLEX) return finishDBSCAN(); - getSingleChainedCluster(); - return finishDBSCAN(); - } - - void getSingleChainedCluster() { - finalClusters.clear(); - float maxScore = FLT_MIN; - float score; - for (unsigned int alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++) { - auto &aln = searchResult.alnVec[alnIdx]; - score = aln.tmScore * (float) aln.matches; - if (score < maxScore) - continue; - maxScore = score; - finalClusters = {{alnIdx}}; + for (unsigned int alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++ ) { + neighbors = {alnIdx}; + finalClusters.insert(neighbors); } + return finishDBSCAN(); } bool runDBSCAN() { @@ -323,10 +312,6 @@ class DBSCANCluster { eps += learningRate; } - // - if (minimumClusterSize < MULTIPLE_CHAINED_COMPLEX && currMaxClusterSize < MULTIPLE_CHAINED_COMPLEX) - getSingleChainedCluster(); - return finishDBSCAN(); } @@ -485,7 +470,7 @@ class DBSCANCluster { class ComplexScorer { public: - ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, float minAssignedChainsRatio, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), monomerIncludeMode(monomerIncludeMode) { + ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, double minAssignedChainsRatio, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), monomerIncludeMode(monomerIncludeMode) { maxChainLen = std::max(qDbr3Di->sequenceReader->getMaxSeqLen()+1, tDbr3Di->sequenceReader->getMaxSeqLen()+1); q3diDbr = qDbr3Di; t3diDbr = tDbr3Di; @@ -613,7 +598,7 @@ class ComplexScorer { Coordinate16 qCoords; Coordinate16 tCoords; unsigned int thread_idx; - float minAssignedChainsRatio; + double minAssignedChainsRatio; unsigned int maxResLen; Chain qChain; Chain dbChain; @@ -749,7 +734,7 @@ int scoremultimer(int argc, const char **argv, const Command &command) { for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) { unsigned int qComplexId = qComplexIndices[qCompIdx]; std::vector &qChainKeys = qComplexIdToChainKeysMap.at(qComplexId); - if (monomerIncludeMode == SKIP_MONOMERS && qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) + if (par.monomerIncludeMode == SKIP_MONOMERS && qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) continue; complexScorer.getSearchResults(qComplexId, qChainKeys, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, searchResults); // for each db complex From 06275df32d419e5783e1f2dc32f1bf234dfb329e Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Fri, 27 Sep 2024 18:49:05 +0900 Subject: [PATCH 10/11] rollback to 43fd26f3d3e043c8f9fd4c2b193a8b68f8781689 --- src/strucclustutils/scoremultimer.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index 2a9aabea..acfa1513 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -199,7 +199,7 @@ class DBSCANCluster { return earlyStopForMonomers(); // rbh filter -// filterAlnsByRBH(); + filterAlnsByRBH(); fillDistMatrix(); // To skip DBSCAN clustering when alignments are few enough. if (searchResult.alnVec.size() <= maximumClusterSize) @@ -236,11 +236,16 @@ class DBSCANCluster { if (minimumClusterSize >= MULTIPLE_CHAINED_COMPLEX) return finishDBSCAN(); + getSingleChainedCluster(); + return finishDBSCAN(); + } + + void getSingleChainedCluster() { + finalClusters.clear(); for (unsigned int alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++ ) { neighbors = {alnIdx}; finalClusters.insert(neighbors); } - return finishDBSCAN(); } bool runDBSCAN() { @@ -312,6 +317,10 @@ class DBSCANCluster { eps += learningRate; } + // + if (minimumClusterSize < MULTIPLE_CHAINED_COMPLEX && currMaxClusterSize < MULTIPLE_CHAINED_COMPLEX) + getSingleChainedCluster(); + return finishDBSCAN(); } @@ -470,7 +479,7 @@ class DBSCANCluster { class ComplexScorer { public: - ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, double minAssignedChainsRatio, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), monomerIncludeMode(monomerIncludeMode) { + ComplexScorer(IndexReader *qDbr3Di, IndexReader *tDbr3Di, DBReader &alnDbr, IndexReader *qCaDbr, IndexReader *tCaDbr, unsigned int thread_idx, float minAssignedChainsRatio, int monomerIncludeMode) : alnDbr(alnDbr), qCaDbr(qCaDbr), tCaDbr(tCaDbr), thread_idx(thread_idx), minAssignedChainsRatio(minAssignedChainsRatio), monomerIncludeMode(monomerIncludeMode) { maxChainLen = std::max(qDbr3Di->sequenceReader->getMaxSeqLen()+1, tDbr3Di->sequenceReader->getMaxSeqLen()+1); q3diDbr = qDbr3Di; t3diDbr = tDbr3Di; @@ -598,7 +607,7 @@ class ComplexScorer { Coordinate16 qCoords; Coordinate16 tCoords; unsigned int thread_idx; - double minAssignedChainsRatio; + float minAssignedChainsRatio; unsigned int maxResLen; Chain qChain; Chain dbChain; @@ -734,7 +743,7 @@ int scoremultimer(int argc, const char **argv, const Command &command) { for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) { unsigned int qComplexId = qComplexIndices[qCompIdx]; std::vector &qChainKeys = qComplexIdToChainKeysMap.at(qComplexId); - if (par.monomerIncludeMode == SKIP_MONOMERS && qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) + if (monomerIncludeMode == SKIP_MONOMERS && qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) continue; complexScorer.getSearchResults(qComplexId, qChainKeys, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, searchResults); // for each db complex From 079a5a13c4b43fe04646912ae1e04e029ce396b8 Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Fri, 27 Sep 2024 19:02:37 +0900 Subject: [PATCH 11/11] monomer related update done --- src/strucclustutils/scoremultimer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index acfa1513..1cf635c5 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -182,7 +182,7 @@ bool compareNeighborWithDist(const NeighborsWithDist &first, const NeighborsWith class DBSCANCluster { public: - DBSCANCluster(SearchResult &searchResult, std::set &finalClusters, double minCov) : searchResult(searchResult), finalClusters(finalClusters) { + DBSCANCluster(SearchResult &searchResult, std::set &finalClusters, float minCov) : searchResult(searchResult), finalClusters(finalClusters) { cLabel = 0; minimumClusterSize = std::ceil((float) searchResult.qChainKeys.size() * minCov); maximumClusterSize = std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()); @@ -317,7 +317,7 @@ class DBSCANCluster { eps += learningRate; } - // + if (minimumClusterSize < MULTIPLE_CHAINED_COMPLEX && currMaxClusterSize < MULTIPLE_CHAINED_COMPLEX) getSingleChainedCluster();