From 13bef659588d9c7cfd81eaff459234c5659a99b8 Mon Sep 17 00:00:00 2001 From: Edwin Sutanto <70097106+essut@users.noreply.github.com> Date: Thu, 21 Jul 2022 15:41:44 +0700 Subject: [PATCH] feat: Enable printing FILTER field for vcf-to-txt (#242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users can opt in to print the FILTER field using the `--with-filter` flag Co-authored-by: Johannes Köster --- src/bcf/to_txt.rs | 32 ++++++++++++++++++-- src/cli.rs | 4 +++ src/main.rs | 2 ++ tests/expected/variant-table-with-filter.txt | 6 ++++ tests/lib.rs | 11 +++++++ tests/test-with-filter.vcf | 14 +++++++++ 6 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 tests/expected/variant-table-with-filter.txt create mode 100644 tests/test-with-filter.vcf diff --git a/src/bcf/to_txt.rs b/src/bcf/to_txt.rs index 241a4f64..59579095 100644 --- a/src/bcf/to_txt.rs +++ b/src/bcf/to_txt.rs @@ -64,11 +64,19 @@ impl Writer { const HEADER_COMMON: &[u8] = b"VARIANT"; -pub fn to_txt(info_tags: &[&str], format_tags: &[&str], show_genotypes: bool) -> Result<()> { +pub fn to_txt( + info_tags: &[&str], + format_tags: &[&str], + show_genotypes: bool, + show_filter: bool, +) -> Result<()> { let mut reader = bcf::Reader::from_stdin()?; let mut writer = Writer::new(io::BufWriter::new(io::stdout())); - let common_n = 5 + info_tags.len(); + let mut common_n = 5 + info_tags.len(); + if show_filter { + common_n += 1 + } writer.write_field(HEADER_COMMON)?; for _ in 1..common_n { writer.write_field(HEADER_COMMON)?; @@ -88,6 +96,9 @@ pub fn to_txt(info_tags: &[&str], format_tags: &[&str], show_genotypes: bool) -> writer.write_field(b"REF")?; writer.write_field(b"ALT")?; writer.write_field(b"QUAL")?; + if show_filter { + writer.write_field(b"FILTER")?; + } for name in info_tags { writer.write_field(name.as_bytes())?; } @@ -124,6 +135,23 @@ pub fn to_txt(info_tags: &[&str], format_tags: &[&str], show_genotypes: bool) -> q => writer.write_float(q)?, } + if show_filter { + if rec.has_filter(".".as_bytes()) { + writer.write_field(b"")? + } else if rec.has_filter("PASS".as_bytes()) { + writer.write_field(b"PASS")? + } else { + let mut filters = Vec::new(); + for (i, filter) in rec.filters().enumerate() { + if i != 0 { + filters.push(b';'); + } + filters.extend_from_slice(&reader.header().id_to_name(filter)); + } + writer.write_field(&filters)?; + } + } + for name in info_tags { let _name = name.as_bytes(); if let Ok((tag_type, tag_length)) = rec.header().info_type(_name) { diff --git a/src/cli.rs b/src/cli.rs index 75c52532..813ed018 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -110,6 +110,10 @@ pub(crate) enum Command { /// Display genotypes. #[structopt(long, short)] genotypes: bool, + + /// Include FILTER field. + #[structopt(long)] + with_filter: bool, }, /// Annotate for each variant in a VCF/BCF at STDIN whether it is contained in a diff --git a/src/main.rs b/src/main.rs index 80131663..0dea0eb2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,10 +54,12 @@ fn main() -> Result<()> { info, format, genotypes, + with_filter, } => bcf::to_txt::to_txt( info.iter().map(|s| s as &str).collect_vec().as_slice(), format.iter().map(|s| s as &str).collect_vec().as_slice(), genotypes, + with_filter, )?, VcfMatch { vcf, diff --git a/tests/expected/variant-table-with-filter.txt b/tests/expected/variant-table-with-filter.txt new file mode 100644 index 00000000..7580c8f5 --- /dev/null +++ b/tests/expected/variant-table-with-filter.txt @@ -0,0 +1,6 @@ +VARIANT VARIANT VARIANT VARIANT VARIANT VARIANT VARIANT VARIANT VARIANT S1 S1 S2 S2 S3 S3 +CHROM POS REF ALT QUAL FILTER T X SOMATIC GT S GT S GT S +1 100 A T 2 true 1/1 a ./1 bbbbbbb . ccccccccc +1 200 A T PASS 2 true 1/1 a ./1 bbbbbbb . ccccccccc +1 300 A T IL 2 true 1/1 a ./1 bbbbbbb . ccccccccc +1 400 A T FA;IL 2 true 1/1 a ./1 bbbbbbb . ccccccccc diff --git a/tests/lib.rs b/tests/lib.rs index 6d7826ef..86cf0b91 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -109,6 +109,17 @@ fn vcf_to_txt() { } #[test] +fn vcf_to_txt_with_filter() { + assert!(Command::new("bash") + .arg("-c") + .arg("target/debug/rbt vcf-to-txt --genotypes --fmt S --info T X SOMATIC --with-filter < tests/test-with-filter.vcf > tests/variant-table-with-filter.txt") + .spawn().unwrap().wait().unwrap().success()); + test_output( + "tests/variant-table-with-filter.txt", + "tests/expected/variant-table-with-filter.txt", + ); +} + // FIXME: can't work out how to use should_panic macro //#[should_panic] fn vcf_to_txt_input_info_as_format() { diff --git a/tests/test-with-filter.vcf b/tests/test-with-filter.vcf new file mode 100644 index 00000000..ea48c497 --- /dev/null +++ b/tests/test-with-filter.vcf @@ -0,0 +1,14 @@ +##fileformat=VCFv4.3 +##FILTER= +##FILTER= +##FILTER= +##contig= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3 +1 100 a A T . . T=2;SOMATIC S:GT a:1/1 bbbbbbb:./1 ccccccccc:. +1 200 a A T . PASS T=2;SOMATIC S:GT a:1/1 bbbbbbb:./1 ccccccccc:. +1 300 a A T . IL T=2;SOMATIC S:GT a:1/1 bbbbbbb:./1 ccccccccc:. +1 400 a A T . FA;IL T=2;SOMATIC S:GT a:1/1 bbbbbbb:./1 ccccccccc:.