Skip to content

Commit

Permalink
update expandcomplex
Browse files Browse the repository at this point in the history
  • Loading branch information
Woosub-Kim committed Dec 13, 2023
1 parent 10ba8f5 commit 76ffa03
Showing 1 changed file with 39 additions and 63 deletions.
102 changes: 39 additions & 63 deletions src/strucclustutils/expandcomplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,80 +10,61 @@
#ifdef OPENMP
#include <omp.h>
#endif
typedef std::pair<unsigned int, unsigned int> Chain_t; // complexId, chainKey
typedef std::pair<Chain_t, Chain_t> ChainAln_t; // queryChain, dbChain
typedef std::vector<ChainAln_t> SearchResult_t; // dbComplexId, vector of ChainAln_t

bool compareChainAln_tByDbComplexId(const ChainAln_t &first, const ChainAln_t &second) {
if (first.second.first < second.second.first)
return true;
if (first.second.first > second.second.first)
return false;
return false;
}
typedef std::pair<unsigned int, unsigned int> ChainKeyPair_t; // queryChain, dbChain

bool compareChainAln_tByQueryChainKeyDbChainKey(const ChainAln_t &first, const ChainAln_t &second) {
if (first.first.second < second.first.second)
bool compareChainKeyPair_t(const ChainKeyPair_t &first, const ChainKeyPair_t &second) {
if (first.first < second.first)
return true;
if (first.first.second > second.first.second)
if (first.first > second.first)
return false;
if (first.second.second < second.second.second)
if (first.second < second.second)
return true;
if (first.second.second > second.second.second)
if (first.second > second.second)
return false;
return false;
}

class ComplexExpander {
public:
ComplexExpander(DBReader<unsigned int> &alnDbr, chainKeyToComplexId_t &dbChainKeyToComplexIdLookup, unsigned int thread_idx)
: alnDbr(alnDbr), thread_idx(thread_idx), dbChainKeyToComplexIdLookup(dbChainKeyToComplexIdLookup){}
ComplexExpander(DBReader<unsigned int> &alnDbr, chainKeyToComplexId_t &dbChainKeyToComplexIdMap, complexIdToChainKeys_t &dbComplexIdToChainKeysMap, unsigned int thread_idx)
: alnDbr(alnDbr), thread_idx(thread_idx), dbChainKeyToComplexIdMap(dbChainKeyToComplexIdMap), dbComplexIdToChainKeysMap(dbComplexIdToChainKeysMap) {}

void getSearchResults(unsigned int qComplexId, std::vector<unsigned int> &qChainKeys, std::vector<SearchResult_t> &searchResults) {
void getQueryDbKeyPairs(std::vector<unsigned int> &qChainKeys, std::vector<ChainKeyPair_t> & queryDbKeyPairs) {
for (auto qChainKey: qChainKeys) {
unsigned int qKey = alnDbr.getId(qChainKey);
if (qKey == NOT_AVAILABLE_CHAIN_KEY)
continue;
char *data = alnDbr.getData(qKey, thread_idx);
qChain = Chain_t(qComplexId, qChainKey);
while (*data != '\0') {
char dbKeyBuffer[255 + 1];
Util::parseKey(data, dbKeyBuffer);
const auto dbChainKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10);
const unsigned int dbComplexId = dbChainKeyToComplexIdLookup.at(dbChainKey);
currAln = ChainAln_t(qChain, Chain_t(dbComplexId, dbChainKey));
currAlns.emplace_back(currAln);
const unsigned int dbComplexId = dbChainKeyToComplexIdMap.at(dbChainKey);
if (std::find(dbFoundComplexIndexes.begin(), dbFoundComplexIndexes.end(), dbComplexId) == dbFoundComplexIndexes.end())
dbFoundComplexIndexes.emplace_back(dbComplexId);
data = Util::skipLine(data);
}
}
if (currAlns.empty())
if (dbFoundComplexIndexes.empty())
return;
SORT_SERIAL(currAlns.begin(), currAlns.end(), compareChainAln_tByDbComplexId);
unsigned int currDbComplexId = currAlns[0].second.first;
searchResult = {};
for (auto &aln: currAlns) {
if (aln.second.first != currDbComplexId) {
if (!searchResult.empty())
searchResults.emplace_back(searchResult);
currDbComplexId = aln.second.first;
searchResult = {};
for (auto dbComplexId: dbFoundComplexIndexes) {
auto &dbChainKeys = dbComplexIdToChainKeysMap.at(dbComplexId);
for (auto qChainKey: qChainKeys) {
for (auto dbChainKey: dbChainKeys) {
queryDbKeyPairs.emplace_back(qChainKey, dbChainKey);
}
}
searchResult.emplace_back(aln);
}
if (!searchResult.empty())
searchResults.emplace_back(searchResult);
currAlns.clear();
searchResult.clear();
dbFoundComplexIndexes.clear();
}

private:
DBReader<unsigned int> &alnDbr;
unsigned int thread_idx;
Chain_t qChain;
ChainAln_t currAln;
std::vector<ChainAln_t> currAlns;
SearchResult_t searchResult;
chainKeyToComplexId_t &dbChainKeyToComplexIdLookup;
std::vector<unsigned int> dbFoundComplexIndexes;
chainKeyToComplexId_t &dbChainKeyToComplexIdMap;
complexIdToChainKeys_t &dbComplexIdToChainKeysMap;
};

int expandcomplex(int argc, const char **argv, const Command &command) {
Expand All @@ -93,10 +74,12 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
std::string dbLookupFile = par.db2 + ".lookup";
DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, Parameters::DBTYPE_ALIGNMENT_RES);
// TEMP
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, Parameters::DBTYPE_PREFILTER_RES);
resultWriter.open();
std::vector<unsigned int> qComplexIndices;
std::vector<unsigned int> dbComplexIndices;
std::vector<ChainKeyPair_t> chainKeyPairs;
chainKeyToComplexId_t qChainKeyToComplexIdMap;
chainKeyToComplexId_t dbChainKeyToComplexIdMap;
complexIdToChainKeys_t dbComplexIdToChainKeysMap;
Expand All @@ -105,7 +88,6 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
getKeyToIdMapIdToKeysMapIdVec(dbLookupFile, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, dbComplexIndices);
dbComplexIndices.clear();
qChainKeyToComplexIdMap.clear();
dbComplexIdToChainKeysMap.clear();
Debug::Progress progress(qComplexIndices.size());

#pragma omp parallel
Expand All @@ -114,39 +96,33 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
#ifdef OPENMP
thread_idx = static_cast<unsigned int>(omp_get_thread_num());
#endif
std::vector<SearchResult_t> searchResults;
resultToWrite_t result;
ComplexExpander complexExpander(alnDbr, dbChainKeyToComplexIdMap, thread_idx);
ComplexExpander complexExpander(alnDbr, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, thread_idx);
#pragma omp for schedule(dynamic, 1)
// for each q complex
for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) {
unsigned int qComplexId = qComplexIndices[qCompIdx];
std::vector<unsigned int> &qChainKeys = qComplexIdToChainKeysMap.at(qComplexId);
complexExpander.getSearchResults(qComplexId, qChainKeys, searchResults);
for (unsigned int searchResultIdx = 0; searchResultIdx < searchResults.size(); searchResultIdx++) {
auto &currentSearchResult = searchResults[searchResultIdx];
SORT_SERIAL(currentSearchResult.begin(), currentSearchResult.end(),compareChainAln_tByQueryChainKeyDbChainKey);
unsigned int qPrevChainKey = currentSearchResult[0].first.second;
for (size_t alnIdx=0; alnIdx<currentSearchResult.size(); alnIdx++) {
auto &aln = currentSearchResult[alnIdx];
if (aln.first.second != qPrevChainKey) {
resultWriter.writeData(result.c_str(),result.length(),qPrevChainKey,thread_idx);
result.clear();
qPrevChainKey = aln.first.second;
}
result.append(SSTR(aln.second.second));
result.push_back('\n');
complexExpander.getQueryDbKeyPairs(qChainKeys, chainKeyPairs);
SORT_SERIAL(chainKeyPairs.begin(), chainKeyPairs.end(), compareChainKeyPair_t);
unsigned int qPrevChainKey = chainKeyPairs[0].first;
for (size_t chainKeyPairIdx=0; chainKeyPairIdx<chainKeyPairs.size(); chainKeyPairIdx++) {
unsigned int qCurrChainKey = chainKeyPairs[chainKeyPairIdx].first;
if (qCurrChainKey != qPrevChainKey) {
resultWriter.writeData(result.c_str(),result.length(),qPrevChainKey,thread_idx);
result.clear();
qPrevChainKey = chainKeyPairs[chainKeyPairIdx].first;
}
resultWriter.writeData(result.c_str(),result.length(),qPrevChainKey,thread_idx);
result.clear();
result.append(SSTR(chainKeyPairs[chainKeyPairIdx].second));
result.push_back('\n');
}
searchResults.clear();
progress.updateProgress();
}
}
qComplexIndices.clear();
dbChainKeyToComplexIdMap.clear();
qComplexIdToChainKeysMap.clear();
dbComplexIdToChainKeysMap.clear();
alnDbr.close();
resultWriter.close(true);
return EXIT_SUCCESS;
Expand Down

0 comments on commit 76ffa03

Please sign in to comment.