Skip to content

Commit

Permalink
Carry extended dbtype for complexsearch to work with clustered dbs
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed Dec 26, 2023
1 parent f05703d commit e396ca4
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 22 deletions.
1 change: 1 addition & 0 deletions src/strucclustutils/createcomplexreport.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef FOLDSEEK_CREATECOMPLEXREPORT_H
#define FOLDSEEK_CREATECOMPLEXREPORT_H
#include "Matcher.h"
#include "MemoryMapped.h"

const unsigned int NOT_AVAILABLE_CHAIN_KEY = 4294967295;
const double MAX_ASSIGNED_CHAIN_RATIO = 1.0;
Expand Down
4 changes: 3 additions & 1 deletion src/strucclustutils/expandcomplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
std::string dbLookupFile = par.db2 + ".lookup";
DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, Parameters::DBTYPE_PREFILTER_RES);
int dbType = Parameters::DBTYPE_PREFILTER_RES;
dbType = DBReader<unsigned int>::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC);
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, dbType);
resultWriter.open();
std::vector<unsigned int> qComplexIndices;
std::vector<unsigned int> dbComplexIndices;
Expand Down
78 changes: 57 additions & 21 deletions src/strucclustutils/scorecomplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
#include "Util.h"
#include "LocalParameters.h"
#include "Matcher.h"
#include "structureto3diseqdist.h"
#include "StructureUtil.h"
#include "TMaligner.h"
#include "Coordinate16.h"
#include "MemoryMapped.h"
#include "createcomplexreport.h"

#ifdef OPENMP
Expand Down Expand Up @@ -606,28 +604,65 @@ class ComplexScorer {
int scorecomplex(int argc, const char **argv, const Command &command) {
LocalParameters &par = LocalParameters::getLocalInstance();
par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_ALIGN);

DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
uint16_t extended = DBReader<unsigned int>::getExtendedDbtype(alnDbr.getDbtype());
int dbType = Parameters::DBTYPE_ALIGNMENT_RES;
bool needSrc = false;
if (extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC) {
needSrc = true;
dbType = DBReader<unsigned int>::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC);
}
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, dbType);
resultWriter.open();

const bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP);
IndexReader q3DiDbr(StructureUtil::getIndexWithSuffix(par.db1, "_ss"), par.threads, IndexReader::SEQUENCES, touch ? IndexReader::PRELOAD_INDEX : 0);
IndexReader *t3DiDbr = NULL;
auto *qCaDbr = new IndexReader(par.db1, par.threads, IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1), touch ? IndexReader::PRELOAD_INDEX : 0, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA, "_ca" );
IndexReader *tCaDbr = NULL;

std::string t3DiDbrName = StructureUtil::getIndexWithSuffix(par.db2, "_ss");
bool is3DiIdx = Parameters::isEqualDbtype(FileUtil::parseDbType(t3DiDbrName.c_str()), Parameters::DBTYPE_INDEX_DB);
IndexReader t3DiDbr(
is3DiIdx ? t3DiDbrName : par.db2,
par.threads,
needSrc ? IndexReader::SRC_SEQUENCES : IndexReader::SEQUENCES,
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA,
needSrc ? "_seq_ss" : "_ss"
);
IndexReader tCaDbr(
par.db2,
par.threads,
needSrc
? IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB2)
: IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1),
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA,
needSrc ? "_seq_ca" : "_ca"
);
IndexReader* q3DiDbr = NULL;
IndexReader* qCaDbr = NULL;
bool sameDB = false;
if (par.db1 == par.db2) {
sameDB = true;
t3DiDbr = &q3DiDbr;
tCaDbr = qCaDbr;
q3DiDbr = &t3DiDbr;
qCaDbr = &tCaDbr;
} else {
t3DiDbr = new IndexReader(StructureUtil::getIndexWithSuffix(par.db2, "_ss"), par.threads, IndexReader::SEQUENCES, touch ? IndexReader::PRELOAD_INDEX : 0);
tCaDbr = new IndexReader(par.db2, par.threads, IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1), touch ? IndexReader::PRELOAD_INDEX : 0, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA, "_ca");
q3DiDbr = new IndexReader(
StructureUtil::getIndexWithSuffix(par.db1, "_ss"),
par.threads, IndexReader::SEQUENCES,
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA
);
qCaDbr = new IndexReader(
par.db1,
par.threads,
IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1),
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA,
"_ca"
);
}

std::string qLookupFile = par.db1 + ".lookup";
std::string dbLookupFile = par.db2 + ".lookup";

DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, Parameters::DBTYPE_ALIGNMENT_RES);
resultWriter.open();
double minAssignedChainsRatio = par.minAssignedChainsThreshold > MAX_ASSIGNED_CHAIN_RATIO ? MAX_ASSIGNED_CHAIN_RATIO: par.minAssignedChainsThreshold;

std::vector<unsigned int> qComplexIndices;
Expand All @@ -636,6 +671,8 @@ int scorecomplex(int argc, const char **argv, const Command &command) {
chainKeyToComplexId_t dbChainKeyToComplexIdMap;
complexIdToChainKeys_t dbComplexIdToChainKeysMap;
complexIdToChainKeys_t qComplexIdToChainKeysMap;
std::string qLookupFile = par.db1 + ".lookup";
std::string dbLookupFile = par.db2 + ".lookup";
getKeyToIdMapIdToKeysMapIdVec(qLookupFile, qChainKeyToComplexIdMap, qComplexIdToChainKeysMap, qComplexIndices);
getKeyToIdMapIdToKeysMapIdVec(dbLookupFile, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, dbComplexIndices);
qChainKeyToComplexIdMap.clear();
Expand All @@ -652,7 +689,7 @@ int scorecomplex(int argc, const char **argv, const Command &command) {
std::vector<SearchResult> searchResults;
std::vector<Assignment> assignments;
std::vector<resultToWrite_t> resultToWriteLines;
ComplexScorer complexScorer(&q3DiDbr, t3DiDbr, alnDbr, qCaDbr, tCaDbr, thread_idx, minAssignedChainsRatio);
ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio);
#pragma omp for schedule(dynamic, 1)
// for each q complex
for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) {
Expand Down Expand Up @@ -698,10 +735,9 @@ int scorecomplex(int argc, const char **argv, const Command &command) {
dbComplexIdToChainKeysMap.clear();
qComplexIdToChainKeysMap.clear();
alnDbr.close();
delete qCaDbr;
if (!sameDB) {
delete t3DiDbr;
delete tCaDbr;
delete q3DiDbr;
delete qCaDbr;
}
resultWriter.close(true);
return EXIT_SUCCESS;
Expand Down

0 comments on commit e396ca4

Please sign in to comment.