Skip to content

Commit

Permalink
reducing the number of reads to do k-mer stats to 1 in 50 like FastQC
Browse files Browse the repository at this point in the history
  • Loading branch information
guilhermesena1 committed Sep 9, 2021
1 parent e115b55 commit 7426ae2
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1999,7 +1999,7 @@ ModuleKmerContent::summarize_module(FastqStats &stats) {
stats.kmer_count[(i << Constants::bit_shift_kmer) | kmer];

expected_count = pos_kmer_count[i] / dividend;
obs_exp_ratio = observed_count / expected_count;
obs_exp_ratio = (expected_count > 0) ? (observed_count / expected_count) : 0;

if (i == 0 || obs_exp_ratio > obs_exp_max[kmer]) {
obs_exp_max[kmer] = obs_exp_ratio;
Expand Down Expand Up @@ -2059,6 +2059,7 @@ ModuleKmerContent::make_html_data() {
size_t xlim = 0;
for (size_t i = 0; i < lim; ++i)
xlim = max(xlim, where_obs_exp_is_max[kmers_to_report[i].first]);
xlim += kmer_size;

for (size_t i = 0; i < lim; ++i) {
const size_t kmer = kmers_to_report[i].first;
Expand Down
5 changes: 4 additions & 1 deletion src/StreamReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ StreamReader::StreamReader(FalcoConfig &config,

// keep track of which reads to do tile
next_tile_read = 0;
next_kmer_read = 0;
do_tile_read = true;

// Subclasses will use this to deflate if necessary
Expand Down Expand Up @@ -256,7 +257,7 @@ StreamReader::process_sequence_base_from_buffer(FastqStats &stats) {
cur_kmer = ((cur_kmer << Constants::bit_shift_base) | base_ind);

// registers k-mer if seen at least k nucleotides since the last n
if (do_kmer && (num_bases_after_n >= Constants::kmer_size)) {
if (do_kmer && do_kmer_read && (num_bases_after_n >= Constants::kmer_size)) {

stats.kmer_count[(read_pos << Constants::bit_shift_kmer)
| (cur_kmer & Constants::kmer_mask)]++;
Expand Down Expand Up @@ -354,6 +355,7 @@ StreamReader::read_sequence_line(FastqStats &stats) {
num_bases_after_n = 1;
still_in_buffer = true;
next_truncation = 100;
do_kmer_read = (stats.num_reads == next_kmer_read);

/*********************************************************/
/********** THIS LOOP MUST BE ALWAYS OPTIMIZED ***********/
Expand Down Expand Up @@ -537,6 +539,7 @@ StreamReader::postprocess_fastq_record(FastqStats &stats) {
next_tile_read += num_reads_for_tile;
}
}
next_kmer_read += do_kmer_read*num_reads_for_kmer;
}

/*******************************************************/
Expand Down
2 changes: 2 additions & 0 deletions src/StreamReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,14 @@ class StreamReader{

// keep track of reads for which to do kmer and tile count
static const size_t num_reads_for_tile = 10;
static const size_t num_reads_for_kmer = 50;

bool continue_storing_sequences;
bool do_kmer_read;
bool do_tile_read;

size_t next_tile_read;
size_t next_kmer_read;

// Whether or not we have passed the buffer while reading and need to allocate
// more space / use dynamically allocated space to process the base
Expand Down

0 comments on commit 7426ae2

Please sign in to comment.