Skip to content

Commit

Permalink
feat: Enable printing FILTER field for vcf-to-txt (#242)
Browse files Browse the repository at this point in the history
Users can opt in to print the FILTER field using the `--with-filter` flag

Co-authored-by: Johannes Köster <[email protected]>
  • Loading branch information
essut and johanneskoester committed Jul 21, 2022
1 parent 495a843 commit 13bef65
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 2 deletions.
32 changes: 30 additions & 2 deletions src/bcf/to_txt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,19 @@ impl Writer {

const HEADER_COMMON: &[u8] = b"VARIANT";

pub fn to_txt(info_tags: &[&str], format_tags: &[&str], show_genotypes: bool) -> Result<()> {
pub fn to_txt(
info_tags: &[&str],
format_tags: &[&str],
show_genotypes: bool,
show_filter: bool,
) -> Result<()> {
let mut reader = bcf::Reader::from_stdin()?;
let mut writer = Writer::new(io::BufWriter::new(io::stdout()));

let common_n = 5 + info_tags.len();
let mut common_n = 5 + info_tags.len();
if show_filter {
common_n += 1
}
writer.write_field(HEADER_COMMON)?;
for _ in 1..common_n {
writer.write_field(HEADER_COMMON)?;
Expand All @@ -88,6 +96,9 @@ pub fn to_txt(info_tags: &[&str], format_tags: &[&str], show_genotypes: bool) ->
writer.write_field(b"REF")?;
writer.write_field(b"ALT")?;
writer.write_field(b"QUAL")?;
if show_filter {
writer.write_field(b"FILTER")?;
}
for name in info_tags {
writer.write_field(name.as_bytes())?;
}
Expand Down Expand Up @@ -124,6 +135,23 @@ pub fn to_txt(info_tags: &[&str], format_tags: &[&str], show_genotypes: bool) ->
q => writer.write_float(q)?,
}

if show_filter {
if rec.has_filter(".".as_bytes()) {
writer.write_field(b"")?
} else if rec.has_filter("PASS".as_bytes()) {
writer.write_field(b"PASS")?
} else {
let mut filters = Vec::new();
for (i, filter) in rec.filters().enumerate() {
if i != 0 {
filters.push(b';');
}
filters.extend_from_slice(&reader.header().id_to_name(filter));
}
writer.write_field(&filters)?;
}
}

for name in info_tags {
let _name = name.as_bytes();
if let Ok((tag_type, tag_length)) = rec.header().info_type(_name) {
Expand Down
4 changes: 4 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ pub(crate) enum Command {
/// Display genotypes.
#[structopt(long, short)]
genotypes: bool,

/// Include FILTER field.
#[structopt(long)]
with_filter: bool,
},

/// Annotate for each variant in a VCF/BCF at STDIN whether it is contained in a
Expand Down
2 changes: 2 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,12 @@ fn main() -> Result<()> {
info,
format,
genotypes,
with_filter,
} => bcf::to_txt::to_txt(
info.iter().map(|s| s as &str).collect_vec().as_slice(),
format.iter().map(|s| s as &str).collect_vec().as_slice(),
genotypes,
with_filter,
)?,
VcfMatch {
vcf,
Expand Down
6 changes: 6 additions & 0 deletions tests/expected/variant-table-with-filter.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
VARIANT VARIANT VARIANT VARIANT VARIANT VARIANT VARIANT VARIANT VARIANT S1 S1 S2 S2 S3 S3
CHROM POS REF ALT QUAL FILTER T X SOMATIC GT S GT S GT S
1 100 A T 2 true 1/1 a ./1 bbbbbbb . ccccccccc
1 200 A T PASS 2 true 1/1 a ./1 bbbbbbb . ccccccccc
1 300 A T IL 2 true 1/1 a ./1 bbbbbbb . ccccccccc
1 400 A T FA;IL 2 true 1/1 a ./1 bbbbbbb . ccccccccc
11 changes: 11 additions & 0 deletions tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@ fn vcf_to_txt() {
}

#[test]
fn vcf_to_txt_with_filter() {
assert!(Command::new("bash")
.arg("-c")
.arg("target/debug/rbt vcf-to-txt --genotypes --fmt S --info T X SOMATIC --with-filter < tests/test-with-filter.vcf > tests/variant-table-with-filter.txt")
.spawn().unwrap().wait().unwrap().success());
test_output(
"tests/variant-table-with-filter.txt",
"tests/expected/variant-table-with-filter.txt",
);
}

// FIXME: can't work out how to use should_panic macro
//#[should_panic]
fn vcf_to_txt_input_info_as_format() {
Expand Down
14 changes: 14 additions & 0 deletions tests/test-with-filter.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
##fileformat=VCFv4.3
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=FA,Description="oh">
##FILTER=<ID=IL,Description="no">
##contig=<ID=1>
##FORMAT=<ID=S,Number=1,Type=String,Description="Text">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=T,Number=A,Type=Integer,Description="Text">
##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Somatic variant">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3
1 100 a A T . . T=2;SOMATIC S:GT a:1/1 bbbbbbb:./1 ccccccccc:.
1 200 a A T . PASS T=2;SOMATIC S:GT a:1/1 bbbbbbb:./1 ccccccccc:.
1 300 a A T . IL T=2;SOMATIC S:GT a:1/1 bbbbbbb:./1 ccccccccc:.
1 400 a A T . FA;IL T=2;SOMATIC S:GT a:1/1 bbbbbbb:./1 ccccccccc:.

0 comments on commit 13bef65

Please sign in to comment.