Skip to content

Commit

Permalink
Added support for seed chunks.
Browse files Browse the repository at this point in the history
  • Loading branch information
bbuchfink committed Jun 30, 2020
1 parent 0df64f1 commit dc781ec
Show file tree
Hide file tree
Showing 17 changed files with 105 additions and 110 deletions.
44 changes: 0 additions & 44 deletions src/align/extend_ungapped.h

This file was deleted.

2 changes: 1 addition & 1 deletion src/align/legacy/query_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <algorithm>
#include "query_mapper.h"
#include "../../data/reference.h"
#include "../extend_ungapped.h"
#include "../../dp/ungapped.h"
#include "../../output/output.h"
#include "../../output/output_format.h"
#include "../../output/daa_write.h"
Expand Down
2 changes: 1 addition & 1 deletion src/basic/basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "sequence.h"
#include "masking.h"

const char* Const::version_string = "0.9.35";
const char* Const::version_string = "0.9.36";
const char* Const::program_name = "diamond";
const char* Const::id_delimiters = " \a\b\f\n\r\t\v\1";

Expand Down
20 changes: 16 additions & 4 deletions src/basic/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
****/

#include <memory>
#include <exception>
#include "../util/command_line_parser.h"
#include "config.h"
#include "../util/util.h"
Expand All @@ -43,6 +44,12 @@ using namespace std;

Config config;

void Config::set_sens(Sensitivity sens) {
if (sensitivity != Sensitivity::FAST)
throw std::runtime_error("Sensitivity switches are mutually exclusive.");
sensitivity = sens;
}

Config::Config(int argc, const char **argv, bool check_io)
{
Command_line_parser parser;
Expand Down Expand Up @@ -233,6 +240,7 @@ Config::Config(int argc, const char **argv, bool check_io)
("seq", 0, "Sequence numbers to display.", seq_no);

double rank_ratio2;
unsigned window;
Options_group deprecated_options("");
deprecated_options.add()
("window", 'w', "window size for local hit search", window)
Expand Down Expand Up @@ -351,7 +359,8 @@ Config::Config(int argc, const char **argv, bool check_io)
("full-sw-len", 0, "", full_sw_len)
("relaxed-evalue-factor", 0, "", relaxed_evalue_factor, 1.0)
("type", 0, "", type)
("raw", 0, "", raw);
("raw", 0, "", raw)
("ultra-sensitive", 0, "", mode_ultra_sensitive);

parser.add(general).add(makedb).add(cluster).add(aligner).add(advanced).add(view_options).add(getseq_options).add(hidden_options).add(deprecated_options);
parser.store(argc, argv, command);
Expand Down Expand Up @@ -520,6 +529,12 @@ Config::Config(int argc, const char **argv, bool check_io)
verbose_stream << "CPU features detected: " << SIMD::features() << endl;
}

sensitivity = Sensitivity::FAST;
if (mode_sensitive) set_sens(Sensitivity::SENSITIVE);
if (mode_more_sensitive) set_sens(Sensitivity::MORE_SENSITIVE);
if (mode_very_sensitive) set_sens(Sensitivity::VERY_SENSITIVE);
if (mode_ultra_sensitive) set_sens(Sensitivity::ULTRA_SENSITIVE);

Translator::init(query_gencode);

if (command == blastx)
Expand Down Expand Up @@ -548,9 +563,6 @@ Config::Config(int argc, const char **argv, bool check_io)
if (query_range_culling && taxon_k != 0)
throw std::runtime_error("--taxon-k is not supported for --range-culling mode.");

if (beta && (lowmem != 1))
throw std::runtime_error("--beta needs -c1.");

log_stream << "MAX_SHAPE_LEN=" << MAX_SHAPE_LEN;
#ifdef SEQ_MASK
log_stream << " SEQ_MASK";
Expand Down
8 changes: 7 additions & 1 deletion src/basic/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <vector>
#include <stdint.h>

enum class Sensitivity { FAST = 0, SENSITIVE = 1, MORE_SENSITIVE = 2, VERY_SENSITIVE = 3, ULTRA_SENSITIVE = 4 };

struct Config
{

Expand Down Expand Up @@ -53,7 +55,6 @@ struct Config
unsigned min_identities2;
double ungapped_xdrop;
int raw_ungapped_xdrop;
unsigned window;
double min_hit_score;
int min_hit_raw_score;
int hit_band;
Expand Down Expand Up @@ -227,6 +228,9 @@ struct Config
double relaxed_evalue_factor;
string type;
bool raw;
bool mode_ultra_sensitive;

Sensitivity sensitivity;

enum {
makedb = 0, blastp = 1, blastx = 2, view = 3, help = 4, version = 5, getseq = 6, benchmark = 7, random_seqs = 8, compare = 9, sort = 10, roc = 11, db_stat = 12, model_sim = 13,
Expand Down Expand Up @@ -301,6 +305,8 @@ struct Config
return padding;
}

void set_sens(Sensitivity sens);

bool mem_buffered() const { return tmpdir == "/dev/shm"; }

template<typename _t>
Expand Down
2 changes: 1 addition & 1 deletion src/basic/const.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ struct Const
{

enum {
build_version = 136,
build_version = 137,
seedp_bits = 10,
seedp = 1<<seedp_bits,
max_seed_weight = 32,
Expand Down
2 changes: 1 addition & 1 deletion src/chaining/greedy_align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "../basic/sequence.h"
#include "../basic/match.h"
#include "../basic/score_matrix.h"
#include "../align/extend_ungapped.h"
//#include "../align/extend_ungapped.h"
#include "../output/output_format.h"
#include "../dp/hsp_traits.h"
#include "chaining.h"
Expand Down
4 changes: 2 additions & 2 deletions src/data/sequence_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ struct Sequence_set : public String_set<Letter, sequence::DELIMITER, 1>
return max;
}

sequence window_infix(size_t offset, unsigned &left) const
/*sequence window_infix(size_t offset, unsigned &left) const
{
const Letter* begin(this->data(offset));
unsigned n(0);
Expand Down Expand Up @@ -100,7 +100,7 @@ struct Sequence_set : public String_set<Letter, sequence::DELIMITER, 1>
++begin;
const Letter* s(this->data(offset - config.window));
return sequence(s, 2 * config.window, (int)(begin - s));
}
}*/

vector<size_t> partition(unsigned n_part) const
{
Expand Down
10 changes: 3 additions & 7 deletions src/dp/ungapped.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,14 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****/

#ifndef UNGAPPED_H_
#define UNGAPPED_H_

#pragma once
#include "../basic/value.h"
#include "../basic/diagonal_segment.h"
#include "comp_based_stats.h"

int xdrop_ungapped(const Letter *query, const Letter *subject, unsigned seed_len, unsigned &delta, unsigned &len);
int xdrop_ungapped(const Letter *query, const Letter *subject, unsigned &delta, unsigned &len);
//int xdrop_ungapped(const Letter *query, const Letter *subject, unsigned seed_len, unsigned &delta, unsigned &len);
//int xdrop_ungapped(const Letter *query, const Letter *subject, unsigned &delta, unsigned &len);
int xdrop_ungapped_right(const Letter *query, const Letter *subject, int &len);
int ungapped_window(const Letter* query, const Letter* subject, int window);
Diagonal_segment xdrop_ungapped(const sequence &query, const Bias_correction &query_bc, const sequence &subject, int qa, int sa);
Diagonal_segment xdrop_ungapped(const sequence &query, const sequence &subject, int qa, int sa);

#endif
4 changes: 2 additions & 2 deletions src/dp/ungapped_align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "dp.h"
#include "../basic/score_matrix.h"

int xdrop_ungapped(const Letter *query, const Letter *subject, unsigned seed_len, unsigned &delta, unsigned &len)
/*int xdrop_ungapped(const Letter *query, const Letter *subject, unsigned seed_len, unsigned &delta, unsigned &len)
{
int score(0), st(0);
unsigned n(0);
Expand Down Expand Up @@ -73,7 +73,7 @@ int xdrop_ungapped(const Letter *query, const Letter *subject, unsigned seed_len
len = delta + n + seed_len;
return score;
}
}*/

int xdrop_ungapped(const Letter *query, const Letter *subject, unsigned &delta, unsigned &len)
{
Expand Down
9 changes: 2 additions & 7 deletions src/search/collision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "seed_complexity.h"
#include "finger_print.h"

// #define NO_COLLISION_FILTER

namespace DISPATCH_ARCH {

bool verify_hit(const Letter *query, const Letter *subject, unsigned sid)
Expand All @@ -31,8 +29,8 @@ bool verify_hit(const Letter *query, const Letter *subject, unsigned sid)
if (fq.match(fs) < config.min_identities)
return false;
return true;
unsigned delta, len;
return stage2_ungapped(query, subject, sid, delta, len) >= config.min_ungapped_raw_score;
/*unsigned delta, len;
return stage2_ungapped(query, subject, sid, delta, len) >= config.min_ungapped_raw_score;*/
}

inline bool match_shape_mask(const uint64_t mask, const uint64_t shape_mask)
Expand Down Expand Up @@ -94,9 +92,6 @@ bool is_primary_hit(const Letter *query,
const unsigned sid,
const unsigned len)
{
#ifdef NO_COLLISION_FILTER
return true;
#endif
assert(len > 0 && len <= config.window * 2);
const bool chunked(config.lowmem > 1);
uint64_t mask = reduced_match32(query, subject, len);
Expand Down
29 changes: 21 additions & 8 deletions src/search/left_most.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,32 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.

namespace Search {

static inline bool verify_hit(const Letter* q, const Letter* s, int score_cutoff) {
static inline bool verify_hit(const Letter* q, const Letter* s, int score_cutoff, bool left, uint32_t match_mask, unsigned sid) {
if (config.lowmem > 1) {
if ((shapes[sid].mask_ & match_mask) == shapes[sid].mask_) {
Packed_seed seed;
if (!shapes[sid].set_seed(seed, s))
return false;
if (left && !current_range.lower_or_equal(seed_partition(seed)))
return false;
if (!left && !current_range.lower(seed_partition(seed)))
return false;
}
}
Finger_print fq(q), fs(s);
const unsigned id = fq.match(fs);
return id >= config.min_identities;
/*const sequence query_clipped = Util::Sequence::clip(q - config.ungapped_window, config.ungapped_window * 2, config.ungapped_window);
const int window_left = int(q - query_clipped.data()), window = (int)query_clipped.length();
const int ungapped = ungapped_window(query_clipped.data(), s - window_left, window);
std::cerr << "previous hit ungapped=" << ungapped << endl;
return ungapped > score_cutoff;*/
}

static inline bool verify_hits(uint32_t mask, const Letter* q, const Letter* s, int score_cutoff) {
static inline bool verify_hits(uint32_t mask, const Letter* q, const Letter* s, int score_cutoff, bool left, uint32_t match_mask, unsigned sid) {
int shift = 0;
while (mask != 0) {
int i = ctz(mask);
if (verify_hit(q + i + shift, s + i + shift, score_cutoff))
if (verify_hit(q + i + shift, s + i + shift, score_cutoff, left, match_mask >> (i + shift), sid))
return true;
mask >>= i + 1;
shift += i + 1;
Expand All @@ -64,6 +74,7 @@ static inline bool left_most_filter(const sequence &query,
int score_cutoff)
{
constexpr int WINDOW_LEFT = 16, WINDOW_RIGHT = 32;
const bool chunked = config.lowmem > 1;

int d = std::max(seed_offset - WINDOW_LEFT, 0), window_left = std::min(WINDOW_LEFT, seed_offset);
const Letter *q = query.data() + d, *s = subject + d;
Expand Down Expand Up @@ -91,16 +102,18 @@ static inline bool left_most_filter(const sequence &query,

const uint32_t left_hit = context.current_matcher.hit(match_mask_left, len_left) & query_mask_left;

if (first_shape)
return left_hit == 0 || !verify_hits(left_hit, q, s, score_cutoff);
if (first_shape && !chunked)
return left_hit == 0 || !verify_hits(left_hit, q, s, score_cutoff, true, match_mask_left, shape_id);

const uint32_t len_right = window - window_left - 1,
match_mask_right = match_mask >> (window_left + 1),
query_mask_right = query_seed_mask >> (window_left + 1);

const uint32_t right_hit = context.previous_matcher.hit(match_mask_right, len_right) & query_mask_right;
const PatternMatcher& right_matcher = chunked ? context.current_matcher : context.previous_matcher;
const uint32_t right_hit = right_matcher.hit(match_mask_right, len_right) & query_mask_right;

return (left_hit == 0 || !verify_hits(left_hit, q, s, score_cutoff)) && (right_hit == 0 || !verify_hits(right_hit, q + window_left + 1, s + window_left + 1, score_cutoff));
return (left_hit == 0 || !verify_hits(left_hit, q, s, score_cutoff, true, match_mask_left, shape_id))
&& (right_hit == 0 || !verify_hits(right_hit, q + window_left + 1, s + window_left + 1, score_cutoff, false, match_mask_right, shape_id));
}

}
5 changes: 0 additions & 5 deletions src/search/search.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,6 @@ struct Stage1_hit
unsigned q, s;
};

static inline int stage2_ungapped(const Letter *query, const Letter *subject, unsigned sid, unsigned &delta, unsigned &len)
{
return xdrop_ungapped(query, subject, shapes[sid].length_, delta, len);
}

void search_shape(unsigned sid, unsigned query_block, char *query_buffer, char *ref_buffer);
bool use_single_indexed(double coverage, size_t query_letters, size_t ref_letters);
void setup_search_params(pair<size_t, size_t> query_len_bounds, size_t chunk_db_letters);
Expand Down
Loading

0 comments on commit dc781ec

Please sign in to comment.