26
26
#define MSWEEP_LIKELIHOOD_HPP
27
27
28
28
#include " Matrix.hpp"
29
- #include " telescope.hpp"
30
29
31
30
#include " mSWEEP_openmp_config.hpp"
32
31
41
40
#include < algorithm>
42
41
#include < memory>
43
42
43
+ #include " mSWEEP_alignment.hpp"
44
44
#include " Grouping.hpp"
45
45
46
46
namespace mSWEEP {
@@ -106,52 +106,91 @@ class LL_WOR21 : public Likelihood<T> {
106
106
return ll_mat;
107
107
}
108
108
109
- void fill_ll_mat (const telescope ::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
109
+ void fill_ll_mat (const mSWEEP ::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
110
110
size_t num_ecs = alignment.n_ecs ();
111
+ size_t n_targets = alignment.get_n_targets ();
112
+
113
+ size_t n_threads = 1 ;
114
+ #if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1
115
+ #pragma omp parallel
116
+ {
117
+ n_threads = omp_get_num_threads ();
118
+ }
119
+ #endif
120
+
121
+ // This double loop is currently the slowest part in the input reading
122
+ std::vector<bm::sparse_vector<V, bm::bvector<>>> local_counts (n_threads);
123
+ #pragma omp parallel for schedule(static)
124
+ for (size_t i = 0 ; i < num_ecs; ++i) {
125
+ for (size_t j = 0 ; j < n_targets; ++j) {
126
+ if (alignment (i, j)) {
127
+ #if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1
128
+ local_counts[omp_get_thread_num ()].inc ((size_t )((size_t )alignment.get_groups ()[j]*num_ecs) + i);
129
+ #else
130
+ local_counts[0 ].inc ((size_t )((size_t )alignment.get_groups ()[j]*num_ecs) + i);
131
+ #endif
132
+ }
133
+ }
134
+ }
135
+
136
+ bm::sparse_vector<V, bm::bvector<>> group_counts = std::move (local_counts[0 ]);
137
+ for (size_t i = 1 ; i < n_threads; ++i) {
138
+ group_counts.merge (local_counts[i]);
139
+ }
111
140
112
141
bool mask_groups = min_hits > 0 ;
113
142
this ->groups_mask = std::vector<bool >(n_groups, !mask_groups);
114
143
std::vector<V> masked_group_sizes;
144
+ std::vector<size_t > groups_pos (n_groups, 0 );
145
+ size_t n_masked_groups = 0 ;
115
146
if (mask_groups) {
116
147
std::vector<size_t > group_hit_counts (n_groups, (size_t )0 );
117
148
// Create mask identifying groups that have at least 1 alignment
118
- for (size_t i = 0 ; i < num_ecs; ++i) {
119
- for (size_t j = 0 ; j < n_groups; ++j) {
120
- group_hit_counts[j] += (alignment (j, i) > 0 ) * alignment.reads_in_ec (i);
149
+ #pragma omp parallel for schedule(static) reduction(vec_size_t_plus:group_hit_counts)
150
+ for (size_t j = 0 ; j < n_groups; ++j) {
151
+ for (size_t i = 0 ; i < num_ecs; ++i) {
152
+ group_hit_counts[j] += (group_counts[j*num_ecs + i] > 0 ) * alignment.reads_in_ec (i);
121
153
}
122
154
}
155
+
123
156
for (size_t i = 0 ; i < n_groups; ++i) {
124
157
this ->groups_mask [i] = groups_mask[i] || (group_hit_counts[i] >= min_hits);
125
158
if (this ->groups_mask [i]) {
159
+ groups_pos[i] = n_masked_groups;
126
160
masked_group_sizes.push_back (group_sizes[i]);
161
+ ++n_masked_groups;
127
162
}
128
163
}
129
164
} else {
130
165
masked_group_sizes = group_sizes;
166
+ #pragma omp parallel for schedule(static)
167
+ for (size_t i = 0 ; i < n_groups; ++i) {
168
+ groups_pos[i] = i;
169
+ }
131
170
}
132
- size_t n_masked_groups = masked_group_sizes.size ();
171
+ n_masked_groups = masked_group_sizes.size ();
133
172
134
173
this ->update_bb_parameters (masked_group_sizes, n_masked_groups, this ->bb_constants );
135
174
const seamat::DenseMatrix<T> &precalc_lls_mat = this ->precalc_lls (masked_group_sizes, n_masked_groups);
136
175
137
176
this ->log_likelihoods .resize (n_masked_groups, num_ecs, std::log (this ->zero_inflation ));
138
- for ( size_t j = 0 ; j < num_ecs; ++j) {
139
- size_t groups_pos = 0 ;
140
- for (size_t i = 0 ; i < n_groups; ++i) {
177
+
178
+ # pragma omp parallel for schedule(static)
179
+ for (size_t i = 0 ; i < n_groups; ++i) {
141
180
if (this ->groups_mask [i]) {
142
- this ->log_likelihoods (groups_pos, j) = precalc_lls_mat (groups_pos, alignment (i, j));
143
- ++groups_pos;
181
+ for (size_t j = 0 ; j < num_ecs; ++j) {
182
+ this ->log_likelihoods (groups_pos[i], j) = precalc_lls_mat (groups_pos[i], group_counts[i*num_ecs + j]);
183
+ }
144
184
}
145
- }
146
185
}
147
186
}
148
187
149
- void fill_ec_counts (const telescope ::Alignment &alignment) {
188
+ void fill_ec_counts (const mSWEEP ::Alignment &alignment) {
150
189
// Fill log ec counts.
151
190
this ->log_ec_counts .resize (alignment.n_ecs (), 0 );
152
191
#pragma omp parallel for schedule(static)
153
192
for (size_t i = 0 ; i < alignment.n_ecs (); ++i) {
154
- this ->log_ec_counts [i] = std::log (alignment.reads_in_ec (i));
193
+ this ->log_ec_counts [i] = std::log (alignment.reads_in_ec (i));
155
194
}
156
195
}
157
196
@@ -170,14 +209,14 @@ class LL_WOR21 : public Likelihood<T> {
170
209
public:
171
210
LL_WOR21 () = default ;
172
211
173
- LL_WOR21 (const std::vector<V> &group_sizes, const telescope ::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) {
212
+ LL_WOR21 (const std::vector<V> &group_sizes, const mSWEEP ::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) {
174
213
this ->bb_constants [0 ] = tol;
175
214
this ->bb_constants [1 ] = frac_mu;
176
215
this ->zero_inflation = _zero_inflation;
177
216
this ->from_grouped_alignment (alignment, group_sizes, n_groups, min_hits);
178
217
}
179
218
180
- void from_grouped_alignment (const telescope ::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
219
+ void from_grouped_alignment (const mSWEEP ::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
181
220
this ->fill_ll_mat (alignment, group_sizes, n_groups, min_hits);
182
221
this ->fill_ec_counts (alignment);
183
222
}
@@ -292,7 +331,7 @@ class LL_WOR21 : public Likelihood<T> {
292
331
const std::vector<bool >& groups_considered () const override { return this ->groups_mask ; };
293
332
};
294
333
template <typename T>
295
- std::unique_ptr<Likelihood<T>> ConstructAdaptiveLikelihood (const telescope ::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) {
334
+ std::unique_ptr<Likelihood<T>> ConstructAdaptiveLikelihood (const mSWEEP ::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) {
296
335
size_t max_group_size = grouping.max_group_size ();
297
336
size_t n_groups = grouping.get_n_groups ();
298
337
std::unique_ptr<Likelihood<T>> log_likelihoods;
0 commit comments