forked from DaehwanKimLab/hisat2
-
Notifications
You must be signed in to change notification settings - Fork 6
/
aligner_sw_nuc.h
262 lines (234 loc) · 7.04 KB
/
aligner_sw_nuc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
/*
* Copyright 2011, Ben Langmead <[email protected]>
*
* This file is part of Bowtie 2.
*
* Bowtie 2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Bowtie 2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ALIGNER_SW_NUC_H_
#define ALIGNER_SW_NUC_H_
#include <stdint.h>
#include "aligner_sw_common.h"
#include "aligner_result.h"
/**
* Encapsulates a backtrace stack frame. Includes enough information that we
* can "pop" back up to this frame and choose to make a different backtracking
* decision. The information included is:
*
* 1. The mask at the decision point. When we first move through the mask and
* when we backtrack to it, we're careful to mask out the bit corresponding
* to the path we're taking. When we move through it after removing the
* last bit from the mask, we're careful to pop it from the stack.
* 2. The sizes of the edit lists. When we backtrack, we resize the lists back
* down to these sizes to get rid of any edits introduced since the branch
* point.
*/
struct DpNucFrame {
/**
* Initialize a new DpNucFrame stack frame.
*/
void init(
size_t nedsz_,
size_t aedsz_,
size_t celsz_,
size_t row_,
size_t col_,
size_t gaps_,
size_t readGaps_,
size_t refGaps_,
AlnScore score_,
int ct_)
{
nedsz = nedsz_;
aedsz = aedsz_;
celsz = celsz_;
row = row_;
col = col_;
gaps = gaps_;
readGaps = readGaps_;
refGaps = refGaps_;
score = score_;
ct = ct_;
}
size_t nedsz; // size of the nucleotide edit list at branch (before
// adding the branch edit)
size_t aedsz; // size of ambiguous nucleotide edit list at branch
size_t celsz; // size of cell-traversed list at branch
size_t row; // row of cell where branch occurred
size_t col; // column of cell where branch occurred
size_t gaps; // number of gaps before branch occurred
size_t readGaps; // number of read gaps before branch occurred
size_t refGaps; // number of ref gaps before branch occurred
AlnScore score; // score where branch occurred
int ct; // table type (oall, rdgap or rfgap)
};
enum {
BT_CAND_FATE_SUCCEEDED = 1,
BT_CAND_FATE_FAILED,
BT_CAND_FATE_FILT_START, // skipped b/c starting cell already explored
BT_CAND_FATE_FILT_DOMINATED, // skipped b/c it was dominated
BT_CAND_FATE_FILT_SCORE // skipped b/c score not interesting anymore
};
/**
* Encapsulates a cell that we might want to backtrace from.
*/
struct DpBtCandidate {
DpBtCandidate() { reset(); }
DpBtCandidate(size_t row_, size_t col_, TAlScore score_) {
init(row_, col_, score_);
}
void reset() { init(0, 0, 0); }
void init(size_t row_, size_t col_, TAlScore score_) {
row = row_;
col = col_;
score = score_;
// 0 = invalid; this should be set later according to what happens
// before / during the backtrace
fate = 0;
}
/**
* Return true iff this candidate is (heuristically) dominated by the given
* candidate. We say that candidate A dominates candidate B if (a) B is
* somewhere in the N x N square that extends up and to the left of A,
* where N is an arbitrary number like 20, and (b) B's score is <= than
* A's.
*/
inline bool dominatedBy(const DpBtCandidate& o) {
const size_t SQ = 40;
size_t rowhi = row;
size_t rowlo = o.row;
if(rowhi < rowlo) swap(rowhi, rowlo);
size_t colhi = col;
size_t collo = o.col;
if(colhi < collo) swap(colhi, collo);
return (colhi - collo) <= SQ &&
(rowhi - rowlo) <= SQ;
}
/**
* Return true if this candidate is "greater than" (should be considered
* later than) the given candidate.
*/
bool operator>(const DpBtCandidate& o) const {
if(score < o.score) return true;
if(score > o.score) return false;
if(row < o.row ) return true;
if(row > o.row ) return false;
if(col < o.col ) return true;
if(col > o.col ) return false;
return false;
}
/**
* Return true if this candidate is "less than" (should be considered
* sooner than) the given candidate.
*/
bool operator<(const DpBtCandidate& o) const {
if(score > o.score) return true;
if(score < o.score) return false;
if(row > o.row ) return true;
if(row < o.row ) return false;
if(col > o.col ) return true;
if(col < o.col ) return false;
return false;
}
/**
* Return true if this candidate equals the given candidate.
*/
bool operator==(const DpBtCandidate& o) const {
return row == o.row &&
col == o.col &&
score == o.score;
}
bool operator>=(const DpBtCandidate& o) const { return !((*this) < o); }
bool operator<=(const DpBtCandidate& o) const { return !((*this) > o); }
#ifndef NDEBUG
/**
* Check internal consistency.
*/
bool repOk() const {
assert(VALID_SCORE(score));
return true;
}
#endif
size_t row; // cell row
size_t col; // cell column w/r/t LHS of rectangle
TAlScore score; // score fo alignment
int fate; // flag indicating whether we succeeded, failed, skipped
};
template <typename T>
class NBest {
public:
NBest<T>() { nelt_ = nbest_ = n_ = 0; }
bool inited() const { return nelt_ > 0; }
void init(size_t nelt, size_t nbest) {
nelt_ = nelt;
nbest_ = nbest;
elts_.resize(nelt * nbest);
ncur_.resize(nelt);
ncur_.fill(0);
n_ = 0;
}
/**
* Add a new result to bin 'elt'. Where it gets prioritized in the list of
* results in that bin depends on the result of operator>.
*/
bool add(size_t elt, const T& o) {
assert_lt(elt, nelt_);
const size_t ncur = ncur_[elt];
assert_leq(ncur, nbest_);
n_++;
for(size_t i = 0; i < nbest_ && i <= ncur; i++) {
if(o > elts_[nbest_ * elt + i] || i >= ncur) {
// Insert it here
// Move everyone from here on down by one slot
for(int j = (int)ncur; j > (int)i; j--) {
if(j < (int)nbest_) {
elts_[nbest_ * elt + j] = elts_[nbest_ * elt + j - 1];
}
}
elts_[nbest_ * elt + i] = o;
if(ncur < nbest_) {
ncur_[elt]++;
}
return true;
}
}
return false;
}
/**
* Return true iff there are no solutions.
*/
bool empty() const {
return n_ == 0;
}
/**
* Dump all the items in our payload into the given EList.
*/
template<typename TList>
void dump(TList& l) const {
if(empty()) return;
for(size_t i = 0; i < nelt_; i++) {
assert_leq(ncur_[i], nbest_);
for(size_t j = 0; j < ncur_[i]; j++) {
l.push_back(elts_[i * nbest_ + j]);
}
}
}
protected:
size_t nelt_;
size_t nbest_;
EList<T> elts_;
EList<size_t> ncur_;
size_t n_; // total # results added
};
#endif /*def ALIGNER_SW_NUC_H_*/