Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
JJChenCharly committed Jun 22, 2023
1 parent 879f92e commit 09d15ac
Show file tree
Hide file tree
Showing 8 changed files with 23 additions and 19 deletions.
2 changes: 1 addition & 1 deletion src/Blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import List
import copy

__version__ = "0.2.2"
__version__ = "0.2.3"

class BLAST:
def __init__(self,
Expand Down
4 changes: 2 additions & 2 deletions src/Step1_preparation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ int main(int argc, char** argv) {
exit(0);
}

std::ofstream pre_res_output(pre_res_path, std::ios::trunc);
std::ofstream pre_res_output(pre_res_path, std::ios::trunc); // m = overwrite

int start_c = 10000;
int start_c = 0;
std::string strain_naam;

for (const auto& entry : fs::directory_iterator(raw_annotated_dir_path)) { // range iteration to get abs path
Expand Down
2 changes: 1 addition & 1 deletion src/Step2_simple_derep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ int main(int argc, char** argv) {

std::string a_line, info;
std::string a_seq = "";
int seq_id = 10000; // start
int seq_id = 0; // start

std::unordered_set<std::string> added_seq = {"added_seq"};

Expand Down
18 changes: 11 additions & 7 deletions src/Step3_pre_cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ int main(int argc, char** argv) {
if (!(fs::exists(parent_dereped_cated_fasta_pth))) {
std::cerr << "Error: parent path provided to '-d or --concatenated_fasta' does not exist. 路径不存在\n";
exit(0);
}
}
else if (!(fs::exists(parent_nr_dir))) {
std::cerr << "Error: parent path provided to '-n or --nr_genomes' does not exist. 路径不存在\n";
exit(0);
Expand Down Expand Up @@ -119,14 +119,17 @@ int main(int argc, char** argv) {

// first
getline (a_dereped_fasta, a_line);
seq_id = a_line.substr(1, 11);
info = a_line.substr(13);

size_t space_indx = a_line.find(" ");
seq_id = a_line.substr(1, space_indx - 1);
info = a_line.substr(space_indx + 1);

while (getline (a_dereped_fasta, a_line)) {

if (a_line[0] == '>') { // if the row of id
// save id info (a previous info)
id_info << seq_id + "\t" + info + "\n";

// operate on previous SeqRecord using previous seq_id
if (pre_cluster.count(a_seq) == 0) { // if no prescence before, need comparison with find
pre_cluster[a_seq].push_back(seq_id);
Expand All @@ -145,16 +148,17 @@ int main(int argc, char** argv) {

dereped_cated_dict[seq_id] = rec_id + a_seq_to_write;

each_d[seq_id.substr(0, 5)].push_back(seq_id);
each_d[seq_id.substr(0, seq_id.find("-"))].push_back(seq_id);

} else { // if presence before
(pre_cluster.at(a_seq)).push_back(seq_id);

}

// cover previous seq_id
info = a_line.substr(13);
seq_id = a_line.substr(1, 11);
size_t space_indx = a_line.find(" ");
seq_id = a_line.substr(1, space_indx - 1);
info = a_line.substr(space_indx + 1);

// reset previous a_seq
a_seq = ""; // reset a_seq
Expand Down Expand Up @@ -184,7 +188,7 @@ int main(int argc, char** argv) {

dereped_cated_dict[seq_id] = rec_id + a_seq_to_write;

each_d[seq_id.substr(0, 5)].push_back(seq_id);
each_d[seq_id.substr(0, seq_id.find("-"))].push_back(seq_id);

} else {
(pre_cluster.at(a_seq)).push_back(seq_id);
Expand Down
2 changes: 1 addition & 1 deletion src/Step5_query_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ int main(int argc, char** argv) {
while (std::getline(a_blast_op, a_line)) {
a_line = a_line + "\n";

std::string query = a_line.substr(0, 11);
std::string query = a_line.substr(0, a_line.find("\t"));

size_t h = hash_fn(query);
int b = h % b_level;
Expand Down
6 changes: 3 additions & 3 deletions src/Step6_filter_n_bin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ int main(int argc, char** argv) {
key = element;
}
key_++;
bin_spe[key].insert(element.substr(0, 5));
bin_spe[key].insert(element.substr(0, element.find("-")));
}

}
Expand Down Expand Up @@ -203,7 +203,7 @@ int main(int argc, char** argv) {
std::getline(ss_, subject, '\t');
std::getline(ss_, score);

if (query.substr(0, 5) == subject.substr(0, 5)) {
if (query.substr(0, query.find("-")) == subject.substr(0, subject.find("-"))) {
continue;
} else {
int score_ = stoi(score);
Expand All @@ -227,7 +227,7 @@ int main(int argc, char** argv) {

for (const auto& [sub, its_score] : nested_um) {

std::string s_spe = sub.substr(0, 5);
std::string s_spe = sub.substr(0, sub.find("-"));

if (bin_spe_->at(qs).find(s_spe) != bin_spe_->at(qs).end()) { // if qs pre cluster has that spe
continue;
Expand Down
6 changes: 3 additions & 3 deletions src/Step9_write_clusters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,15 +207,15 @@ int main(int argc, char** argv) {

// first one
getline(dereped_cat_file, a_line);
std::string key = a_line.substr(1, 11);
std::string key = a_line.substr(1, a_line.find(" ") - 1);
std::string seq = "";

while (getline(dereped_cat_file, a_line)) {
if (a_line[0] == '>') {
// save previous one
fasta_dict[key] = seq;

key = a_line.substr(1, 11);
key = a_line.substr(1, a_line.find(" ") - 1);
seq = "";
} else {
seq = seq + a_line + "\n";
Expand Down Expand Up @@ -243,7 +243,7 @@ int main(int argc, char** argv) {
std::unordered_set<std::string> spe_count;

for (const std::string& s : the_cluster) {
spe_count.insert(s.substr(0, 5));
spe_count.insert(s.substr(0, s.find("-")));
}

fs::path file_naam;
Expand Down
2 changes: 1 addition & 1 deletion src/ThreadPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <stdexcept>
#include <string>

extern const std::string __version__ = "0.2.2";
extern const std::string __version__ = "0.2.3";

class ThreadPool {
public:
Expand Down

0 comments on commit 09d15ac

Please sign in to comment.