Skip to content

Commit 2e1f2f3

Browse files
authored
Merge pull request #31 from PROBIC/refactor-input-reading
Refactor input reading
2 parents c537327 + 1ae8a17 commit 2e1f2f3

8 files changed

+356
-86
lines changed

CMakeLists.txt

+12-39
Original file line numberDiff line numberDiff line change
@@ -232,45 +232,18 @@ else()
232232
set_target_properties(alignment-writer PROPERTIES EXCLUDE_FROM_ALL 1)
233233
set(CMAKE_ALIGNMENT_WRITER_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer/include)
234234
set(CMAKE_ALIGNMENT_WRITER_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libalignment-writer.a)
235+
set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer/external/BitMagic-7.12.3/src/)
235236
endif()
236237
include_directories(${CMAKE_ALIGNMENT_WRITER_HEADERS})
237238
target_link_libraries(mSWEEP ${CMAKE_ALIGNMENT_WRITER_LIBRARY})
238239

239-
## telescope
240-
if (DEFINED CMAKE_TELESCOPE_LIBRARY AND DEFINED CMAKE_TELESCOPE_HEADERS)
241-
message(STATUS "telescope headers provided in: ${CMAKE_TELESCOPE_HEADERS}")
242-
message(STATUS "telescope library provided in: ${CMAKE_TELESCOPE_LIBRARY}")
240+
## BitMagic
241+
if (DEFINED CMAKE_BITMAGIC_HEADERS)
242+
message(STATUS "BitMagic headers provided in: ${CMAKE_BITMAGIC_HEADERS}")
243243
else()
244-
FetchContent_Declare(telescope
245-
GIT_REPOSITORY https://github.com/tmaklin/telescope.git
246-
GIT_TAG v0.7.3
247-
PREFIX "external"
248-
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/telescope"
249-
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope"
250-
BUILD_IN_SOURCE 0
251-
CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS}
252-
-D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS}
253-
-D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS}
254-
-D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS}
255-
-D CMAKE_ALIGNMENT_WRITER_LIBRARY=${CMAKE_ALIGNMENT_WRITER_LIBRARY}
256-
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/external/BitMagic-7.12.3/src
257-
-D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
258-
-D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
259-
-D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
260-
-D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
261-
-D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
262-
INSTALL_COMMAND ""
263-
)
264-
FetchContent_MakeAvailable(telescope)
265-
add_dependencies(telescope libalignmentwriter)
266-
add_dependencies(mSWEEP telescope)
267-
set_target_properties(telescope PROPERTIES EXCLUDE_FROM_ALL 1)
268-
set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/include)
269-
set(CMAKE_TELESCOPE_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libtelescope.a)
270-
set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/external/BitMagic-7.12.3/src)
244+
message(FATAL_ERROR "Provide BitMagic C++ headers with -DCMAKE_BITMAGIC_HEADERS")
271245
endif()
272-
include_directories(${CMAKE_TELESCOPE_HEADERS} ${CMAKE_BITMAGIC_HEADERS})
273-
target_link_libraries(mSWEEP ${CMAKE_TELESCOPE_LIBRARY})
246+
include_directories(${CMAKE_BITMAGIC_HEADERS})
274247

275248
## seamat
276249
if (DEFINED CMAKE_SEAMAT_HEADERS)
@@ -283,6 +256,7 @@ else()
283256
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/seamat"
284257
BUILD_IN_SOURCE 0
285258
CMAKE_ARGS -D CMAKE_BUILD_TESTS=0
259+
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS}
286260
BUILD_COMMAND ""
287261
CONFIGURE_COMMAND ""
288262
INSTALL_COMMAND ""
@@ -332,7 +306,7 @@ if (DEFINED CMAKE_MGEMS_LIBRARY AND DEFINED CMAKE_MGEMS_HEADERS)
332306
else()
333307
FetchContent_Declare(mGEMS
334308
GIT_REPOSITORY https://github.com/PROBIC/mGEMS.git
335-
GIT_TAG v1.3.0
309+
GIT_TAG v1.3.3
336310
PREFIX "external"
337311
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS"
338312
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS"
@@ -341,9 +315,8 @@ else()
341315
-D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS}
342316
-D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS}
343317
-D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS}
344-
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src
318+
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS}
345319
-D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS}
346-
-D CMAKE_TELESCOPE_HEADERS=${CMAKE_TELESCOPE_HEADERS}
347320
-D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
348321
-D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
349322
-D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
@@ -352,13 +325,13 @@ else()
352325
INSTALL_COMMAND ""
353326
)
354327
FetchContent_MakeAvailable(mGEMS)
355-
add_dependencies(mGEMS telescope libalignmentwriter)
328+
add_dependencies(mGEMS libalignmentwriter)
356329
add_dependencies(mSWEEP libmgems)
357330
set_target_properties(mGEMS PROPERTIES EXCLUDE_FROM_ALL 1)
358-
set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS/include)
331+
set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS/include ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/include)
359332
set(CMAKE_MGEMS_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/lib/libmgems.a)
360333
endif()
361-
target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY} ${CMAKE_TELESCOPE_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY})
334+
target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY})
362335
include_directories(${CMAKE_MGEMS_HEADERS})
363336

364337
include_directories(

include/Likelihood.hpp

+56-17
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#define MSWEEP_LIKELIHOOD_HPP
2727

2828
#include "Matrix.hpp"
29-
#include "telescope.hpp"
3029

3130
#include "mSWEEP_openmp_config.hpp"
3231

@@ -41,6 +40,7 @@
4140
#include <algorithm>
4241
#include <memory>
4342

43+
#include "mSWEEP_alignment.hpp"
4444
#include "Grouping.hpp"
4545

4646
namespace mSWEEP {
@@ -106,52 +106,91 @@ class LL_WOR21 : public Likelihood<T> {
106106
return ll_mat;
107107
}
108108

109-
void fill_ll_mat(const telescope::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
109+
void fill_ll_mat(const mSWEEP::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
110110
size_t num_ecs = alignment.n_ecs();
111+
size_t n_targets = alignment.get_n_targets();
112+
113+
size_t n_threads = 1;
114+
#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1
115+
#pragma omp parallel
116+
{
117+
n_threads = omp_get_num_threads();
118+
}
119+
#endif
120+
121+
// This double loop is currently the slowest part in the input reading
122+
std::vector<bm::sparse_vector<V, bm::bvector<>>> local_counts(n_threads);
123+
#pragma omp parallel for schedule(static)
124+
for (size_t i = 0; i < num_ecs; ++i) {
125+
for (size_t j = 0; j < n_targets; ++j) {
126+
if (alignment(i, j)) {
127+
#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1
128+
local_counts[omp_get_thread_num()].inc((size_t)((size_t)alignment.get_groups()[j]*num_ecs) + i);
129+
#else
130+
local_counts[0].inc((size_t)((size_t)alignment.get_groups()[j]*num_ecs) + i);
131+
#endif
132+
}
133+
}
134+
}
135+
136+
bm::sparse_vector<V, bm::bvector<>> group_counts = std::move(local_counts[0]);
137+
for (size_t i = 1; i < n_threads; ++i) {
138+
group_counts.merge(local_counts[i]);
139+
}
111140

112141
bool mask_groups = min_hits > 0;
113142
this->groups_mask = std::vector<bool>(n_groups, !mask_groups);
114143
std::vector<V> masked_group_sizes;
144+
std::vector<size_t> groups_pos(n_groups, 0);
145+
size_t n_masked_groups = 0;
115146
if (mask_groups) {
116147
std::vector<size_t> group_hit_counts(n_groups, (size_t)0);
117148
// Create mask identifying groups that have at least 1 alignment
118-
for (size_t i = 0; i < num_ecs; ++i) {
119-
for (size_t j = 0; j < n_groups; ++j) {
120-
group_hit_counts[j] += (alignment(j, i) > 0) * alignment.reads_in_ec(i);
149+
#pragma omp parallel for schedule(static) reduction(vec_size_t_plus:group_hit_counts)
150+
for (size_t j = 0; j < n_groups; ++j) {
151+
for (size_t i = 0; i < num_ecs; ++i) {
152+
group_hit_counts[j] += (group_counts[j*num_ecs + i] > 0) * alignment.reads_in_ec(i);
121153
}
122154
}
155+
123156
for (size_t i = 0; i < n_groups; ++i) {
124157
this->groups_mask[i] = groups_mask[i] || (group_hit_counts[i] >= min_hits);
125158
if (this->groups_mask[i]) {
159+
groups_pos[i] = n_masked_groups;
126160
masked_group_sizes.push_back(group_sizes[i]);
161+
++n_masked_groups;
127162
}
128163
}
129164
} else {
130165
masked_group_sizes = group_sizes;
166+
#pragma omp parallel for schedule(static)
167+
for (size_t i = 0; i < n_groups; ++i) {
168+
groups_pos[i] = i;
169+
}
131170
}
132-
size_t n_masked_groups = masked_group_sizes.size();
171+
n_masked_groups = masked_group_sizes.size();
133172

134173
this->update_bb_parameters(masked_group_sizes, n_masked_groups, this->bb_constants);
135174
const seamat::DenseMatrix<T> &precalc_lls_mat = this->precalc_lls(masked_group_sizes, n_masked_groups);
136175

137176
this->log_likelihoods.resize(n_masked_groups, num_ecs, std::log(this->zero_inflation));
138-
for (size_t j = 0; j < num_ecs; ++j) {
139-
size_t groups_pos = 0;
140-
for (size_t i = 0; i < n_groups; ++i) {
177+
178+
#pragma omp parallel for schedule(static)
179+
for (size_t i = 0; i < n_groups; ++i) {
141180
if (this->groups_mask[i]) {
142-
this->log_likelihoods(groups_pos, j) = precalc_lls_mat(groups_pos, alignment(i, j));
143-
++groups_pos;
181+
for (size_t j = 0; j < num_ecs; ++j) {
182+
this->log_likelihoods(groups_pos[i], j) = precalc_lls_mat(groups_pos[i], group_counts[i*num_ecs + j]);
183+
}
144184
}
145-
}
146185
}
147186
}
148187

149-
void fill_ec_counts(const telescope::Alignment &alignment) {
188+
void fill_ec_counts(const mSWEEP::Alignment &alignment) {
150189
// Fill log ec counts.
151190
this->log_ec_counts.resize(alignment.n_ecs(), 0);
152191
#pragma omp parallel for schedule(static)
153192
for (size_t i = 0; i < alignment.n_ecs(); ++i) {
154-
this->log_ec_counts[i] = std::log(alignment.reads_in_ec(i));
193+
this->log_ec_counts[i] = std::log(alignment.reads_in_ec(i));
155194
}
156195
}
157196

@@ -170,14 +209,14 @@ class LL_WOR21 : public Likelihood<T> {
170209
public:
171210
LL_WOR21() = default;
172211

173-
LL_WOR21(const std::vector<V> &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) {
212+
LL_WOR21(const std::vector<V> &group_sizes, const mSWEEP::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) {
174213
this->bb_constants[0] = tol;
175214
this->bb_constants[1] = frac_mu;
176215
this->zero_inflation = _zero_inflation;
177216
this->from_grouped_alignment(alignment, group_sizes, n_groups, min_hits);
178217
}
179218

180-
void from_grouped_alignment(const telescope::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
219+
void from_grouped_alignment(const mSWEEP::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
181220
this->fill_ll_mat(alignment, group_sizes, n_groups, min_hits);
182221
this->fill_ec_counts(alignment);
183222
}
@@ -292,7 +331,7 @@ class LL_WOR21 : public Likelihood<T> {
292331
const std::vector<bool>& groups_considered() const override { return this->groups_mask; };
293332
};
294333
template <typename T>
295-
std::unique_ptr<Likelihood<T>> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) {
334+
std::unique_ptr<Likelihood<T>> ConstructAdaptiveLikelihood(const mSWEEP::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) {
296335
size_t max_group_size = grouping.max_group_size();
297336
size_t n_groups = grouping.get_n_groups();
298337
std::unique_ptr<Likelihood<T>> log_likelihoods;

include/Sample.hpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
#include <memory>
3434

3535
#include "Matrix.hpp"
36-
#include "telescope.hpp"
36+
#include "mSWEEP_alignment.hpp"
3737

3838
namespace mSWEEP {
3939
class Sample {
@@ -47,7 +47,7 @@ class Sample {
4747
std::vector<double> log_KLDs;
4848

4949
protected:
50-
void count_alignments(const telescope::Alignment &alignment);
50+
void count_alignments(const mSWEEP::Alignment &alignment);
5151

5252
public:
5353
// Virtual functions
@@ -111,7 +111,7 @@ class PlainSample : public Sample {
111111
public:
112112
PlainSample() = default;
113113

114-
PlainSample(const telescope::Alignment &alignment) {
114+
PlainSample(const mSWEEP::Alignment &alignment) {
115115
this->count_alignments(alignment);
116116
}
117117

@@ -132,7 +132,7 @@ class BinningSample : public PlainSample, public Binning {
132132
public:
133133
BinningSample() = default;
134134

135-
BinningSample(const telescope::Alignment &alignment) {
135+
BinningSample(const mSWEEP::Alignment &alignment) {
136136
this->count_alignments(alignment);
137137
this->store_aligned_reads(alignment.get_aligned_reads());
138138
}
@@ -157,19 +157,19 @@ class BootstrapSample : public Sample {
157157
std::vector<std::vector<double>> bootstrap_results;
158158

159159
// Set all variables required to bootstrap the ec_counts later
160-
void init_bootstrap(const telescope::Alignment &alignment);
160+
void init_bootstrap(const mSWEEP::Alignment &alignment);
161161

162162
protected:
163-
void construct(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed, const size_t bootstrap_count=0);
163+
void construct(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed, const size_t bootstrap_count=0);
164164

165165
public:
166166
BootstrapSample() = default;
167167

168168
// Set seed in constructor
169-
BootstrapSample(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed) {
169+
BootstrapSample(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed) {
170170
this->construct(alignment, _iters, seed);
171171
}
172-
BootstrapSample(const telescope::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) {
172+
BootstrapSample(const mSWEEP::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) {
173173
this->construct(alignment, _iters, seed, _bootstrap_count);
174174
}
175175

@@ -191,18 +191,18 @@ class BootstrapSample : public Sample {
191191

192192
class BinningBootstrap : public BootstrapSample, public Binning {
193193
public:
194-
BinningBootstrap(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed) {
194+
BinningBootstrap(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed) {
195195
this->construct(alignment, _iters, seed);
196196
this->store_aligned_reads(alignment.get_aligned_reads());
197197
}
198-
BinningBootstrap(const telescope::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) {
198+
BinningBootstrap(const mSWEEP::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) {
199199
this->construct(alignment, _iters, seed, _bootstrap_count);
200200
this->store_aligned_reads(alignment.get_aligned_reads());
201201
}
202202

203203
};
204204

205-
void ConstructSample(const telescope::Alignment &alignment, const size_t bootstrap_iters, const size_t bootstrap_count, const size_t bootstrap_seed, const bool bin_reads, std::unique_ptr<Sample> &sample);
205+
void ConstructSample(const mSWEEP::Alignment &alignment, const size_t bootstrap_iters, const size_t bootstrap_count, const size_t bootstrap_seed, const bool bin_reads, std::unique_ptr<Sample> &sample);
206206

207207
}
208208

0 commit comments

Comments
 (0)