Skip to content

Commit

Permalink
reorganizing constants, getting rid of template.html file
Browse files Browse the repository at this point in the history
  • Loading branch information
guilhermesena1 committed Dec 13, 2019
1 parent 99aa91c commit 8c66bdc
Show file tree
Hide file tree
Showing 11 changed files with 643 additions and 553 deletions.
483 changes: 0 additions & 483 deletions Configuration/template.html

This file was deleted.

494 changes: 484 additions & 10 deletions src/FalcoConfig.cpp

Large diffs are not rendered by default.

72 changes: 63 additions & 9 deletions src/FalcoConfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,76 @@

#include "aux.hpp"

/*************************************************************
******************** ALL MAGIC NUMBERS **********************
*************************************************************/
namespace Constants {
// log of a power of two, to use in bit shifting for fast index acces
// returns the log2 of a number if it is a power of two, or zero
// otherwise
constexpr size_t
log2exact(size_t v) {
return (63 -
((v & 0x00000000FFFFFFFF) ? 32 : 0) -
((v & 0x0000FFFF0000FFFF) ? 16 : 0) -
((v & 0x00FF00FF00FF00FF) ? 8 : 0) -
((v & 0x0F0F0F0F0F0F0F0F) ? 4 : 0) -
((v & 0x3333333333333333) ? 2 : 0) -
((v & 0x5555555555555555) ? 1 : 0));
}

static const size_t kmer_size = 7;
static const size_t max_adapters = 128;

// number of bases for static allocation.
static const size_t num_static_bases = 500;

// Value to subtract quality characters to get the actual quality value
static const size_t quality_zero = 33; // The ascii for the lowest quality

// Smallest power of two that comprises all possible Illumina quality values.
// Illumina gives qualities from 0 to 40, therefore we set it as 64. Power of
// is to avoid double pointer jumps and to get indices with bit shifts.
static const size_t num_quality_values = 128;

// How many possible nucleotides (must be power of 2!)
static const size_t num_nucleotides = 4; // A = 00,C = 01,T = 10,G = 11

/************* DUPLICATION ESTIMATES *************/
// Number of unique sequences to see before stopping counting sequences
static const size_t unique_reads_stop_counting = 1e5;

// Maximum read length to store the entire read in memory
static const size_t unique_reads_max_length = 75;

// Prefix size to cut if read length exceeds the value above
static const size_t unique_reads_truncate = 50;

/****Bit shifts as instructions for the std::arrays***/
// for matrices that count stats per nucleotide
static const size_t bit_shift_base = log2exact(num_nucleotides);

// for matrices that count stats for quality value
static const size_t bit_shift_quality = log2exact(num_quality_values);

// bit shift for adapters, log(128) = 7
static const size_t bit_shift_adapter = log2exact(max_adapters);

// we shift 14 bits when reading a kmer, two bits per base
static const size_t bit_shift_kmer = 2 * Constants::kmer_size;

// mask to get only the first 2*k bits of the sliding window
static const size_t kmer_mask = (1ll << (2*Constants::kmer_size)) - 1;
};

/*************************************************************
******************** CUSTOM CONFIGURATION *******************
*************************************************************/

// config from options, constants, magic numbers, etc
struct FalcoConfig {

FalcoConfig(); // set magic defaults

/************************************************************
*************** MY UNIVERSAL CONSTANTS *********************
************************************************************/
// threshold for a sequence to be considered poor quality
size_t kPoorQualityThreshold;

/************************************************************
*************** FASTQC OPTION PARSER************************
************************************************************/
Expand All @@ -50,12 +105,11 @@ struct FalcoConfig {
bool quiet;
size_t min_length; // lower limit in sequence length to be shown in report
size_t threads; // number of threads to read multiple files in parallel
size_t kmer_size; // kmer size
std::string format; // force file format
std::string contaminants_file; // custom contaminants file
std::string adapters_file; // adapters file
std::string limits_file; // file with limits and options and custom analyses
std::string html_file; // file with limits and options and custom analyses
static const std::string html_template; // the html for the template
std::string tmpdir; // dir for temp files when generating report images

// config on how to handle reads
Expand Down
11 changes: 3 additions & 8 deletions src/FastqStats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,21 +146,16 @@ struct FastqStats {
static const size_t kBitShiftQuality = log2exact(kNumQualityValues);

/************ KMER CONSTANTS **********/
// Kmer size given as input
static const size_t kmer_size = 7;

// we shift 14 bits when reading a kmer, two bits per base
static const size_t kBitShiftKmer = 2 * kmer_size;
static const size_t kBitShiftKmer = 2 * Constants::kmer_size;

// mask to get only the first 2*k bits of the sliding window
static const size_t kmer_mask = (1ll << (2*kmer_size)) - 1;
static const size_t kmer_mask = (1ll << (2*Constants::kmer_size)) - 1;

/************ ADAPTER CONSTANTS **********/
// Maximum number of adapters
static const size_t max_adapters = 128;

// bit shift for adapters, log(100) = 7
static const size_t kBitShiftAdapter = log2exact(max_adapters);
static const size_t kBitShiftAdapter = log2exact(Constants::max_adapters);


public:
Expand Down
13 changes: 2 additions & 11 deletions src/HtmlMaker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,7 @@ HtmlMaker::put_file_details(const FalcoConfig &falco_config) {
put_data("{{date}}", time_fmt);
}

HtmlMaker::HtmlMaker(string html_template_path) {
html_boilerplate = "";
ifstream in(html_template_path);
if (!in) {
throw runtime_error("HTML layout not found: " + html_template_path);
}

// pass the whole source code template to a string
string line;
while (getline(in, line))
html_boilerplate += line + "\n";
HtmlMaker::HtmlMaker() {
html_boilerplate = FalcoConfig::html_template;
}

4 changes: 2 additions & 2 deletions src/HtmlMaker.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
/*******************************************************/
class HtmlMaker {
public:
explicit HtmlMaker(std::string html_template_path);
std::string html_boilerplate;
HtmlMaker();
// Fill data from module
void put_data(const std::string &placeholder, const std::string &data);

Expand All @@ -38,6 +39,5 @@ class HtmlMaker {

// Put file details and date
void put_file_details(const FalcoConfig &falco_config);
std::string html_boilerplate;
};
#endif
2 changes: 1 addition & 1 deletion src/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1815,7 +1815,7 @@ Module(ModuleKmerContent::module_name) {

void
ModuleKmerContent::summarize_module(const FastqStats &stats) {
kmer_size = stats.kmer_size;
kmer_size = Constants::kmer_size;

// 4^kmer size
num_kmers = (1 << (2 * kmer_size));
Expand Down
33 changes: 17 additions & 16 deletions src/StreamReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ using std::array;
/****************************************************/

// function to turn a vector into array for adapter hashes and fast lookup
array<size_t, FastqStats::max_adapters>
array<size_t, Constants::max_adapters>
make_adapters(const vector<size_t> &adapter_hashes) {
if (adapter_hashes.size() > FastqStats::max_adapters)
if (adapter_hashes.size() > Constants::max_adapters)
throw runtime_error("Number of adapters is larger than 128, which hinders "
"visualziation and speed of falco. Please keep it to "
"under 128");

array<size_t, FastqStats::max_adapters> ans;
array<size_t, Constants::max_adapters> ans;
for (size_t i = 0; i < adapter_hashes.size(); ++i)
ans[i] = adapter_hashes[i];

Expand Down Expand Up @@ -248,17 +248,17 @@ StreamReader::process_sequence_base_from_buffer(FastqStats &stats) {
// increments basic statistic counts
cur_gc_count += (base_ind & 1);
stats.base_count[
(read_pos << stats.kBitShiftNucleotide) | base_ind]++;
(read_pos << Constants::bit_shift_base) | base_ind]++;

if (do_sliding_window) {
// Update k-mer sequence
cur_kmer = ((cur_kmer << stats.kBitShiftNucleotide) | base_ind);
cur_kmer = ((cur_kmer << Constants::bit_shift_base) | base_ind);

// registers k-mer if seen at least k nucleotides since the last n
if (do_kmer && (num_bases_after_n == stats.kmer_size)) {
if (do_kmer && (num_bases_after_n == Constants::kmer_size)) {

stats.kmer_count[(read_pos << stats.kBitShiftKmer)
| (cur_kmer & stats.kmer_mask)]++;
stats.kmer_count[(read_pos << Constants::bit_shift_kmer)
| (cur_kmer & Constants::bit_shift_kmer)]++;
stats.pos_kmer_count[read_pos]++;
}

Expand All @@ -267,7 +267,8 @@ StreamReader::process_sequence_base_from_buffer(FastqStats &stats) {
cur_kmer &= adapter_mask;
for (i = 0; i != num_adapters; ++i) {
if (cur_kmer == adapters[i]) {
stats.pos_adapter_count[(read_pos << stats.kBitShiftAdapter) | i]++;
stats.pos_adapter_count[(read_pos << Constants::bit_shift_adapter)
| i]++;
}
}
}
Expand All @@ -293,7 +294,7 @@ StreamReader::process_sequence_base_from_leftover(FastqStats &stats) {

// increments basic statistic counts
cur_gc_count += (base_ind & 1);
stats.long_base_count[(leftover_ind << stats.kBitShiftNucleotide)
stats.long_base_count[(leftover_ind << Constants::bit_shift_base)
| base_ind]++;

// WE WILL NOT DO KMER STATS OUTSIDE OF BUFFER
Expand Down Expand Up @@ -413,7 +414,7 @@ inline void
StreamReader::process_quality_base_from_buffer(FastqStats &stats) {
// Average quality in position
stats.position_quality_count[
(read_pos << stats.kBitShiftQuality) | quality_value
(read_pos << Constants::bit_shift_quality) | quality_value
]++;

// Tile processing
Expand All @@ -431,7 +432,7 @@ inline void
StreamReader::process_quality_base_from_leftover(FastqStats &stats) {
// Average quality in position
stats.long_position_quality_count[
(leftover_ind << stats.kBitShiftQuality) | quality_value]++;
(leftover_ind << Constants::bit_shift_quality) | quality_value]++;

// Tile processing
if (!tile_ignore) {
Expand Down Expand Up @@ -463,7 +464,7 @@ StreamReader::read_quality_line(FastqStats &stats) {
get_base_from_buffer();

// Converts quality ascii to zero-based
quality_value = *cur_char - stats.kBaseQuality;
quality_value = *cur_char - Constants::quality_zero;

// Fast bases from buffer
if (still_in_buffer) {
Expand Down Expand Up @@ -496,11 +497,11 @@ inline void
StreamReader::postprocess_fastq_record(FastqStats &stats) {
if (do_sequence_hash) {
// if reads are >75pb, truncate to 50
if (read_pos <= stats.kDupReadMaxSize) {
if (read_pos <= Constants::unique_reads_max_length) {
buffer[read_pos] = '\0';
}
else {
buffer[stats.kDupReadTruncateSize] = '\0';
buffer[Constants::unique_reads_truncate] = '\0';
}

sequence_to_hash = string(buffer);
Expand All @@ -512,7 +513,7 @@ StreamReader::postprocess_fastq_record(FastqStats &stats) {
++stats.num_unique_seen;

// if we reached the cutoff of 100k, stop storing
if (stats.num_unique_seen == stats.kDupUniqueCutoff) {
if (stats.num_unique_seen == Constants::unique_reads_stop_counting) {
continue_storing_sequences = false;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/StreamReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class StreamReader{
const size_t num_adapters;
const size_t adapter_size;
const size_t adapter_mask;
const std::array<size_t, FastqStats::max_adapters> adapters;
const std::array<size_t, Constants::max_adapters> adapters;

/************ FUNCTIONS TO PROCESS READS AND BASES ***********/
// gets and puts bases from and to buffer
Expand Down
10 changes: 6 additions & 4 deletions src/falco.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,20 @@ write_results(const FalcoConfig &falco_config,
// Here we open the full text summary
ofstream qc_data_txt;
if (!skip_text) {

string qc_data_file = falco_config.filename;
qc_data_file = outdir + "/fastqc_data.txt";
qc_data_txt.open(qc_data_file.c_str(), std::ofstream::binary);

if (!falco_config.quiet)
log_process("Writing text report to " + qc_data_file);

// put header
qc_data_txt << "##Falco\t0.1\n";
}

// Here we open the html ostream and maker object
HtmlMaker html_maker(falco_config.html_file);
HtmlMaker html_maker = HtmlMaker();
ofstream html;
if (!skip_html) {
// Decide html filename based on input
Expand Down Expand Up @@ -358,8 +363,6 @@ int main(int argc, const char **argv) {
opt_parse.add_opt("-limits", 'l',
"Non-default file with limits and warn/fail criteria",
false, falco_config.contaminants_file);
opt_parse.add_opt("-kmer", 'k', "k-mer size (default = 7, max = 10)", false,
falco_config.kmer_size);
opt_parse.add_opt("-skip-text", 'T', "Skip generating text file "
"(Default = false)", false, skip_text);
opt_parse.add_opt("-skip-html", 'H', "Skip generating HTML file "
Expand Down Expand Up @@ -500,7 +503,6 @@ int main(int argc, const char **argv) {
}

// Write results
log_process("Writing results");
write_results(falco_config, stats, skip_text, skip_html,
skip_short_summary, cur_outdir);

Expand Down
Loading

0 comments on commit 8c66bdc

Please sign in to comment.