-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Pierre Marijon
committed
Jan 10, 2020
1 parent
08271fd
commit f32900e
Showing
18 changed files
with
307 additions
and
359 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
name = "yacrd" | ||
version = "0.6.0" | ||
authors = ["Pierre Marijon <[email protected]>"] | ||
edition = '2018' | ||
|
||
exclude = ["image/*", "tests/*"] | ||
|
||
|
@@ -10,30 +11,26 @@ homepage = "https://github.com/natir/yacrd" | |
repository = "https://github.com/natir/yacrd" | ||
readme = "Readme.md" | ||
license = "MIT" | ||
keywords = ["bioinformatics", "chimera", "long-read"] | ||
keywords = ["bioinformatics", "chimera", "long-read", "scrubbing"] | ||
|
||
[badges] | ||
travis-ci = { repository = "natir/yacrd", branch = "master" } | ||
|
||
[dependencies] | ||
bio = "0.30" | ||
csv = "1" | ||
log = "0.4.0" | ||
csv = "1.1" | ||
log = "0.4" | ||
anyhow = "1.0" | ||
niffler = {git = "https://github.com/luizirber/niffler/", branch = "api_1.0"} | ||
thiserror = "1.0" | ||
structopt = "0.3" | ||
env_logger = "0.7" | ||
lazy_static = "1.0" | ||
serde_derive = "1.0" | ||
enum_primitive = "0.1.1" | ||
|
||
|
||
[dev-dependencies] | ||
tempfile = "3" | ||
tempfile = "3.1" | ||
|
||
[profile.release] | ||
debug = true # uncomment for proffiling | ||
# debug = true # uncomment for proffiling | ||
lto = 'thin' | ||
opt-level = 3 | ||
overflow-checks = false | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,45 +22,39 @@ SOFTWARE. | |
|
||
#[derive(StructOpt, Debug)] | ||
#[structopt( | ||
version = "0.6b Mew", | ||
version = "0.6.0 Flareon", | ||
author = "Pierre Marijon <[email protected]>", | ||
name = "yacrd", | ||
about = " | ||
Yacrd use overlap between reads, to detect 'good' and 'bad' region, | ||
region with coverage over threshold is 'good' other are 'bad'. | ||
If read have a 'bad' region in middle this reads is mark as 'Chimeric'. | ||
If ratio of 'bad' region length on total read length is larger than threshold this reads is mark as 'Not_covered'. | ||
a region with coverage over the threshold is 'good' others are 'bad'. | ||
If read has a 'bad' region in middle this reads is mark as 'Chimeric'. | ||
If the ratio of 'bad' region length on total read length is larger than threshold this reads is mark as 'Not_covered'. | ||
Yacrd can make some other actions: | ||
- filter: for sequence or overlap file, record with reads marked as Chimeric or Not_covered isn't write in output | ||
- extract: for sequence or overlap file, record contain reads marked as Chimeric or Not_covered is write in output | ||
- split: for sequence file bad region in middle of reads are removed, Not_covered read is removed | ||
- scrubb: for sequence file all bad region are removed, Not_covered read is removed | ||
- filter: for sequence or overlap file, record with reads marked as Chimeric or NotCovered isn't written in the output | ||
- extract: for sequence or overlap file, record contains reads marked as Chimeric or NotCovered is written in the output | ||
- split: for sequence file bad region in the middle of reads are removed, NotCovered read is removed | ||
- scrubb: for sequence file all bad region are removed, NotCovered read is removed | ||
" | ||
)] | ||
pub struct Command { | ||
#[structopt( | ||
short = "i", | ||
long = "input", | ||
required = true, | ||
help = "path to input file overlap (.paf|.m4) or yacrd report (.yacrd) format audetected input-format overide detection" | ||
help = "path to input file overlap (.paf|.m4|.mhap) or yacrd report (.yacrd), format is autodetect and compression input is allowed (gz|bzip2|lzma)" | ||
)] | ||
pub input: String, | ||
|
||
#[structopt( | ||
short = "o", | ||
long = "output", | ||
required = true, | ||
help = "path output file, yacrd format by default output-format can overide this value" | ||
help = "path output file" | ||
)] | ||
pub output: String, | ||
|
||
#[structopt(long = "input-format", possible_values = &["paf", "m4", "yacrd", "json"], help = "set the input-format")] | ||
pub input_format: Option<String>, | ||
|
||
#[structopt(long = "output-format", possible_values = &["yacrd", "json"], default_value = "yacrd", help = "set the output-format")] | ||
pub output_format: String, | ||
|
||
#[structopt( | ||
short = "c", | ||
long = "coverage", | ||
|
@@ -73,21 +67,21 @@ pub struct Command { | |
short = "n", | ||
long = "not-coverage", | ||
default_value = "0.8", | ||
help = "if ratio of bad region length on total lengh is lower that this value, all read is mark as bad" | ||
help = "if the ratio of bad region length on total length is lower than this value, read is marked as NotCovered" | ||
)] | ||
pub not_coverage: f64, | ||
|
||
#[structopt( | ||
short = "d", | ||
long = "ondisk", | ||
help = "if it set yacrd create tempory file, with value of this parameter as prefix, to reduce memory usage but increase the runtime, warning if prefix contain path separator (`/` for unix or `\\` for windows) directory is delete" | ||
help = "yacrd switches to 'ondisk' mode which will reduce memory usage but increase computation time. The value passed as a parameter is used as a prefix for the temporary files created by yacrd. Be careful if the prefix contains path separators (`/` for unix or `\\` for windows) this folder will be deleted" | ||
)] | ||
pub ondisk: Option<String>, | ||
|
||
#[structopt( | ||
long = "ondisk-buffer-size", | ||
default_value = "64000000", | ||
help = "with the default value yacrd in ondisk mode use around 800 MBytes, you can increase to reduce runtime but increase memory usage" | ||
help = "with the default value yacrd in 'ondisk' mode use around 1 GBytes, you can increase to reduce runtime but increase memory usage" | ||
)] | ||
pub ondisk_buffer_size: String, | ||
|
||
|
@@ -99,11 +93,11 @@ pub struct Command { | |
pub enum SubCommand { | ||
#[structopt(about = "All bad region of read is removed")] | ||
Scrubb(Scrubb), | ||
#[structopt(about = "Record mark as chimeric or Not_covered is filter")] | ||
#[structopt(about = "Record mark as chimeric or NotCovered is filter")] | ||
Filter(Filter), | ||
#[structopt(about = "Record mark as chimeric or Not_covered is extract")] | ||
#[structopt(about = "Record mark as chimeric or NotCovered is extract")] | ||
Extract(Extract), | ||
#[structopt(about = "Record mark as chimeric or Not_covered is split")] | ||
#[structopt(about = "Record mark as chimeric or NotCovered is split")] | ||
Split(Split), | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.