Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Woosub-Kim committed Jan 2, 2024
2 parents c28e793 + 035edc1 commit 3fe1f9e
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 38 deletions.
1 change: 1 addition & 0 deletions src/strucclustutils/createcomplexreport.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef FOLDSEEK_CREATECOMPLEXREPORT_H
#define FOLDSEEK_CREATECOMPLEXREPORT_H
#include "Matcher.h"
#include "MemoryMapped.h"

const unsigned int NOT_AVAILABLE_CHAIN_KEY = 4294967295;
const double MAX_ASSIGNED_CHAIN_RATIO = 1.0;
Expand Down
40 changes: 32 additions & 8 deletions src/strucclustutils/expandcomplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,43 @@ bool compareChainKeyPair_t(const ChainKeyPair_t &first, const ChainKeyPair_t &se
int expandcomplex(int argc, const char **argv, const Command &command) {
LocalParameters &par = LocalParameters::getLocalInstance();
par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_ALIGN);
std::string qLookupFile = par.db1 + ".lookup";
std::string dbLookupFile = par.db2 + ".lookup";

DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, Parameters::DBTYPE_PREFILTER_RES);

int dbType = Parameters::DBTYPE_CLUSTER_RES;
uint16_t extended = DBReader<unsigned int>::getExtendedDbtype(alnDbr.getDbtype());
bool needSrc = false;
if (extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC) {
needSrc = true;
dbType = DBReader<unsigned int>::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC);
}
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, dbType);
resultWriter.open();

const bool touch = par.preloadMode != Parameters::PRELOAD_MODE_MMAP;
IndexReader tDbr(
par.db2,
par.threads,
needSrc ? IndexReader::SRC_SEQUENCES : IndexReader::SEQUENCES,
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX
);

std::vector<unsigned int> qComplexIndices;
std::vector<unsigned int> dbComplexIndices;
chainKeyToComplexId_t qChainKeyToComplexIdMap;
chainKeyToComplexId_t dbChainKeyToComplexIdMap;
complexIdToChainKeys_t qComplexIdToChainKeysMap;
complexIdToChainKeys_t dbComplexIdToChainKeysMap;
std::string qLookupFile = par.db1 + ".lookup";
std::string dbLookupFile = par.db2 + ".lookup";
getKeyToIdMapIdToKeysMapIdVec(qLookupFile, qChainKeyToComplexIdMap, qComplexIdToChainKeysMap, qComplexIndices);
getKeyToIdMapIdToKeysMapIdVec(dbLookupFile, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, dbComplexIndices);
dbComplexIndices.clear();
qChainKeyToComplexIdMap.clear();
Debug::Progress progress(qComplexIndices.size());

Debug::Progress progress(qComplexIndices.size());
#pragma omp parallel
{
unsigned int thread_idx = 0;
Expand All @@ -63,8 +82,9 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
// For the current query complex
for (size_t qChainIdx=0; qChainIdx<qChainKeys.size(); qChainIdx++) {
unsigned int qKey = alnDbr.getId(qChainKeys[qChainIdx]);
if (qKey == NOT_AVAILABLE_CHAIN_KEY)
if (qKey == NOT_AVAILABLE_CHAIN_KEY) {
continue;
}
char *data = alnDbr.getData(qKey, thread_idx);
while (*data != '\0') {
char dbKeyBuffer[255 + 1];
Expand All @@ -83,14 +103,18 @@ int expandcomplex(int argc, const char **argv, const Command &command) {
continue;
}
// Among all db complexes aligned to query complex
for (auto dbIter = dbFoundIndices.begin(); dbIter != dbFoundIndices.end(); dbIter++) {
std::vector<unsigned int> &dbChainKeys = dbComplexIdToChainKeysMap.at(*dbIter);
for (auto dbIter = dbFoundIndices.cbegin(); dbIter != dbFoundIndices.cend(); ++dbIter) {
const std::vector<unsigned int> &dbChainKeys = dbComplexIdToChainKeysMap.at(*dbIter);
// for all query chains
for (size_t qChainIdx=0; qChainIdx<qChainKeys.size(); qChainIdx++) {
// and target chains
for (size_t dbChainIdx = 0; dbChainIdx < dbChainKeys.size(); dbChainIdx++) {
// get all possible alignments
chainKeyPairs.emplace_back(qChainKeys[qChainIdx], dbChainKeys[dbChainIdx]);
unsigned int currentDbKey = dbChainKeys[dbChainIdx];
if (tDbr.sequenceReader->getId(currentDbKey) == UINT_MAX) {
continue;
}
chainKeyPairs.emplace_back(qChainKeys[qChainIdx], currentDbKey);
}
}
}
Expand Down
78 changes: 57 additions & 21 deletions src/strucclustutils/scorecomplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
#include "Util.h"
#include "LocalParameters.h"
#include "Matcher.h"
#include "structureto3diseqdist.h"
#include "StructureUtil.h"
#include "TMaligner.h"
#include "Coordinate16.h"
#include "MemoryMapped.h"
#include "createcomplexreport.h"

#ifdef OPENMP
Expand Down Expand Up @@ -606,28 +604,65 @@ class ComplexScorer {
int scorecomplex(int argc, const char **argv, const Command &command) {
LocalParameters &par = LocalParameters::getLocalInstance();
par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_ALIGN);

DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
uint16_t extended = DBReader<unsigned int>::getExtendedDbtype(alnDbr.getDbtype());
int dbType = Parameters::DBTYPE_ALIGNMENT_RES;
bool needSrc = false;
if (extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC) {
needSrc = true;
dbType = DBReader<unsigned int>::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC);
}
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, dbType);
resultWriter.open();

const bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP);
IndexReader q3DiDbr(StructureUtil::getIndexWithSuffix(par.db1, "_ss"), par.threads, IndexReader::SEQUENCES, touch ? IndexReader::PRELOAD_INDEX : 0);
IndexReader *t3DiDbr = NULL;
auto *qCaDbr = new IndexReader(par.db1, par.threads, IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1), touch ? IndexReader::PRELOAD_INDEX : 0, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA, "_ca" );
IndexReader *tCaDbr = NULL;

std::string t3DiDbrName = StructureUtil::getIndexWithSuffix(par.db2, "_ss");
bool is3DiIdx = Parameters::isEqualDbtype(FileUtil::parseDbType(t3DiDbrName.c_str()), Parameters::DBTYPE_INDEX_DB);
IndexReader t3DiDbr(
is3DiIdx ? t3DiDbrName : par.db2,
par.threads,
needSrc ? IndexReader::SRC_SEQUENCES : IndexReader::SEQUENCES,
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA,
needSrc ? "_seq_ss" : "_ss"
);
IndexReader tCaDbr(
par.db2,
par.threads,
needSrc
? IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB2)
: IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1),
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA,
needSrc ? "_seq_ca" : "_ca"
);
IndexReader* q3DiDbr = NULL;
IndexReader* qCaDbr = NULL;
bool sameDB = false;
if (par.db1 == par.db2) {
sameDB = true;
t3DiDbr = &q3DiDbr;
tCaDbr = qCaDbr;
q3DiDbr = &t3DiDbr;
qCaDbr = &tCaDbr;
} else {
t3DiDbr = new IndexReader(StructureUtil::getIndexWithSuffix(par.db2, "_ss"), par.threads, IndexReader::SEQUENCES, touch ? IndexReader::PRELOAD_INDEX : 0);
tCaDbr = new IndexReader(par.db2, par.threads, IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1), touch ? IndexReader::PRELOAD_INDEX : 0, DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA, "_ca");
q3DiDbr = new IndexReader(
StructureUtil::getIndexWithSuffix(par.db1, "_ss"),
par.threads, IndexReader::SEQUENCES,
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA
);
qCaDbr = new IndexReader(
par.db1,
par.threads,
IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1),
touch ? IndexReader::PRELOAD_INDEX : 0,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA,
"_ca"
);
}

std::string qLookupFile = par.db1 + ".lookup";
std::string dbLookupFile = par.db2 + ".lookup";

DBReader<unsigned int> alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);
DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast<unsigned int>(par.threads), par.compressed, Parameters::DBTYPE_ALIGNMENT_RES);
resultWriter.open();
double minAssignedChainsRatio = par.minAssignedChainsThreshold > MAX_ASSIGNED_CHAIN_RATIO ? MAX_ASSIGNED_CHAIN_RATIO: par.minAssignedChainsThreshold;

std::vector<unsigned int> qComplexIndices;
Expand All @@ -636,6 +671,8 @@ int scorecomplex(int argc, const char **argv, const Command &command) {
chainKeyToComplexId_t dbChainKeyToComplexIdMap;
complexIdToChainKeys_t dbComplexIdToChainKeysMap;
complexIdToChainKeys_t qComplexIdToChainKeysMap;
std::string qLookupFile = par.db1 + ".lookup";
std::string dbLookupFile = par.db2 + ".lookup";
getKeyToIdMapIdToKeysMapIdVec(qLookupFile, qChainKeyToComplexIdMap, qComplexIdToChainKeysMap, qComplexIndices);
getKeyToIdMapIdToKeysMapIdVec(dbLookupFile, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, dbComplexIndices);
qChainKeyToComplexIdMap.clear();
Expand All @@ -652,7 +689,7 @@ int scorecomplex(int argc, const char **argv, const Command &command) {
std::vector<SearchResult> searchResults;
std::vector<Assignment> assignments;
std::vector<resultToWrite_t> resultToWriteLines;
ComplexScorer complexScorer(&q3DiDbr, t3DiDbr, alnDbr, qCaDbr, tCaDbr, thread_idx, minAssignedChainsRatio);
ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio);
#pragma omp for schedule(dynamic, 1)
// for each q complex
for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) {
Expand Down Expand Up @@ -698,10 +735,9 @@ int scorecomplex(int argc, const char **argv, const Command &command) {
dbComplexIdToChainKeysMap.clear();
qComplexIdToChainKeysMap.clear();
alnDbr.close();
delete qCaDbr;
if (!sameDB) {
delete t3DiDbr;
delete tCaDbr;
delete q3DiDbr;
delete qCaDbr;
}
resultWriter.close(true);
return EXIT_SUCCESS;
Expand Down
44 changes: 35 additions & 9 deletions src/strucclustutils/structurerescorediagonal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "QueryMatcher.h"
#include "TMaligner.h"
#include "Coordinate16.h"
#include "LDDT.h"

#ifdef OPENMP
#include <omp.h>
Expand Down Expand Up @@ -124,13 +125,10 @@ Matcher::result_t ungappedAlignStructure(Sequence & qSeqAA, Sequence & qSeq3Di,
dbEndPos = res.endPos + distanceToDiagonal;
}
unsigned int alnLength = Matcher::computeAlnLength(qStartPos, qEndPos, dbStartPos, dbEndPos);
char buffer[256];
char *end = Itoa::i32toa_sse2(alnLength, buffer);
size_t len = end - buffer;
if (par.addBacktrace) {
backtrace=std::string(buffer, len - 1);
backtrace.push_back('M');
backtrace.append(alnLength, 'M');
}

queryCov = SmithWaterman::computeCov(qStartPos, qEndPos, qSeqAA.L);
targetCov = SmithWaterman::computeCov(dbStartPos, dbEndPos, tSeqAA.L);

Expand Down Expand Up @@ -178,9 +176,12 @@ int structureungappedalign(int argc, const char **argv, const Command& command)
}

bool needTMaligner = (par.tmScoreThr > 0);
bool needLDDT = (par.lddtThr > 0);

IndexReader *qcadbr = NULL;
IndexReader *tcadbr = NULL;
if(needTMaligner){
if(needTMaligner || needLDDT){
par.addBacktrace = 1;
qcadbr = new IndexReader(
par.db1,
par.threads,
Expand Down Expand Up @@ -261,7 +262,11 @@ int structureungappedalign(int argc, const char **argv, const Command& command)
TMaligner *tmaligner = NULL;
if(needTMaligner) {
tmaligner = new TMaligner(
std::max(t3DiDbr->sequenceReader->getMaxSeqLen() + 1, t3DiDbr->sequenceReader->getMaxSeqLen() + 1), false, true);
std::max(qdbr3Di.sequenceReader->getMaxSeqLen() + 1, t3DiDbr->sequenceReader->getMaxSeqLen() + 1), false, true);
}
LDDTCalculator *lddtcalculator = NULL;
if(needLDDT) {
lddtcalculator = new LDDTCalculator(qdbr3Di.sequenceReader->getMaxSeqLen() + 1, t3DiDbr->sequenceReader->getMaxSeqLen() + 1);
}

Coordinate16 qcoords;
Expand All @@ -287,12 +292,17 @@ int structureungappedalign(int argc, const char **argv, const Command& command)
unsigned int querySeqLen = qdbr3Di.sequenceReader->getSeqLen(queryId);
qSeq3Di.mapSequence(id, queryKey, querySeq3Di, querySeqLen);
qSeqAA.mapSequence(id, queryKey, querySeqAA, querySeqLen);
if(needTMaligner){
if(needLDDT || needTMaligner){
size_t qId = qcadbr->sequenceReader->getId(queryKey);
char *qcadata = qcadbr->sequenceReader->getData(qId, thread_idx);
size_t qCaLength = qcadbr->sequenceReader->getEntryLen(qId);
float* queryCaData = qcoords.read(qcadata, qSeq3Di.L, qCaLength);
tmaligner->initQuery(queryCaData, &queryCaData[qSeq3Di.L], &queryCaData[qSeq3Di.L+qSeq3Di.L], NULL, qSeq3Di.L);
if(needTMaligner){
tmaligner->initQuery(queryCaData, &queryCaData[qSeq3Di.L], &queryCaData[qSeq3Di.L+qSeq3Di.L], NULL, qSeq3Di.L);
}
if(needLDDT){
lddtcalculator->initQuery(qSeq3Di.L, queryCaData, &queryCaData[qSeq3Di.L], &queryCaData[qSeq3Di.L+qSeq3Di.L]);
}
}
qRevSeq3Di.mapSequence(id, queryKey, querySeq3Di, querySeqLen);
qRevSeqAA.mapSequence(id, queryKey, querySeqAA, querySeqLen);
Expand Down Expand Up @@ -339,6 +349,19 @@ int structureungappedalign(int argc, const char **argv, const Command& command)
continue;
}
}
if(needLDDT){
size_t tId = tcadbr->sequenceReader->getId(res.dbKey);
char *tcadata = tcadbr->sequenceReader->getData(tId, thread_idx);
size_t tCaLength = tcadbr->sequenceReader->getEntryLen(tId);
float* targetCaData = tcoords.read(tcadata, res.dbLen, tCaLength);
LDDTCalculator::LDDTScoreResult lddtres = lddtcalculator->computeLDDTScore(res.dbLen, res.qStartPos, res.dbStartPos,
res.backtrace,
targetCaData, &targetCaData[res.dbLen],
&targetCaData[res.dbLen+res.dbLen]);
if(lddtres.avgLddtScore < par.lddtThr){
continue;
}
}

if (Alignment::checkCriteria(res, isIdentity, par.evalThr, par.seqIdThr, par.alnLenThr, par.covMode, par.covThr)) {
alignmentResult.emplace_back(res);
Expand All @@ -365,6 +388,9 @@ int structureungappedalign(int argc, const char **argv, const Command& command)
if(needTMaligner){
delete tmaligner;
}
if(needLDDT){
delete lddtcalculator;
}
}

free(tinySubMatAA);
Expand Down

0 comments on commit 3fe1f9e

Please sign in to comment.