Skip to content

Commit

Permalink
expandcomplex should now work correctly with both cluster and non-clu…
Browse files Browse the repository at this point in the history
…ster dbs
  • Loading branch information
milot-mirdita committed Dec 27, 2023
1 parent e396ca4 commit 035edc1
Showing 1 changed file with 31 additions and 9 deletions.
40 changes: 31 additions & 9 deletions src/strucclustutils/expandcomplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,43 @@ bool compareChainKeyPair_t(const ChainKeyPair_t &first, const ChainKeyPair_t &se
int expandcomplex(int argc, const char **argv, const Command &command) {
LocalParameters &par = LocalParameters::getLocalInstance();
par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_ALIGN);
std::string qLookupFile = par.db1 + ".lookup";
std::string dbLookupFile = par.db2 + ".lookup";

DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
int dbType = Parameters::DBTYPE_PREFILTER_RES;
dbType = DBReader<unsigned int>::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC);

int dbType = Parameters::DBTYPE_CLUSTER_RES;
uint16_t extended = DBReader<unsigned int>::getExtendedDbtype(alnDbr.getDbtype());
bool needSrc = false;
if (extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC) {
needSrc = true;
dbType = DBReader<unsigned int>::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC);
}
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, dbType);
resultWriter.open();

const bool touch = par.preloadMode != Parameters::PRELOAD_MODE_MMAP;
IndexReader tDbr(
par.db2,
par.threads,
needSrc ? IndexReader::SRC_SEQUENCES : IndexReader::SEQUENCES,
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX
);

std::vector<unsigned int> qComplexIndices;
std::vector<unsigned int> dbComplexIndices;
chainKeyToComplexId_t qChainKeyToComplexIdMap;
chainKeyToComplexId_t dbChainKeyToComplexIdMap;
complexIdToChainKeys_t qComplexIdToChainKeysMap;
complexIdToChainKeys_t dbComplexIdToChainKeysMap;
std::string qLookupFile = par.db1 + ".lookup";
std::string dbLookupFile = par.db2 + ".lookup";
getKeyToIdMapIdToKeysMapIdVec(qLookupFile, qChainKeyToComplexIdMap, qComplexIdToChainKeysMap, qComplexIndices);
getKeyToIdMapIdToKeysMapIdVec(dbLookupFile, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, dbComplexIndices);
dbComplexIndices.clear();
qChainKeyToComplexIdMap.clear();
Debug::Progress progress(qComplexIndices.size());

Debug::Progress progress(qComplexIndices.size());
#pragma omp parallel
{
unsigned int thread_idx = 0;
Expand All @@ -65,8 +82,9 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
// For the current query complex
for (size_t qChainIdx=0; qChainIdx<qChainKeys.size(); qChainIdx++) {
unsigned int qKey = alnDbr.getId(qChainKeys[qChainIdx]);
if (qKey == NOT_AVAILABLE_CHAIN_KEY)
if (qKey == NOT_AVAILABLE_CHAIN_KEY) {
continue;
}
char *data = alnDbr.getData(qKey, thread_idx);
while (*data != '\0') {
char dbKeyBuffer[255 + 1];
Expand All @@ -85,14 +103,18 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
continue;
}
// Among all db complexes aligned to query complex
for (auto dbIter = dbFoundIndices.begin(); dbIter != dbFoundIndices.end(); dbIter++) {
std::vector<unsigned int> &dbChainKeys = dbComplexIdToChainKeysMap.at(*dbIter);
for (auto dbIter = dbFoundIndices.cbegin(); dbIter != dbFoundIndices.cend(); ++dbIter) {
const std::vector<unsigned int> &dbChainKeys = dbComplexIdToChainKeysMap.at(*dbIter);
// for all query chains
for (size_t qChainIdx=0; qChainIdx<qChainKeys.size(); qChainIdx++) {
// and target chains
for (size_t dbChainIdx = 0; dbChainIdx < dbChainKeys.size(); dbChainIdx++) {
// get all possible alignments
chainKeyPairs.emplace_back(qChainKeys[qChainIdx], dbChainKeys[dbChainIdx]);
unsigned int currentDbKey = dbChainKeys[dbChainIdx];
if (tDbr.sequenceReader->getId(currentDbKey) == UINT_MAX) {
continue;
}
chainKeyPairs.emplace_back(qChainKeys[qChainIdx], currentDbKey);
}
}
}
Expand Down

0 comments on commit 035edc1

Please sign in to comment.