From 4ed01173f6d78c7edec1856e36428430506b697f Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Wed, 29 May 2024 10:19:54 -0500 Subject: [PATCH] vcf/async/io/reader/header: Parse header line by line The async header parser can now build a `vcf::Header` by parsing a raw header line by line. This makes it so that it is no longer required to read the entire raw header into memory before parsing. --- noodles-vcf/CHANGELOG.md | 6 +++ noodles-vcf/src/async/io/reader/header.rs | 49 ++++++++++++++++++++--- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/noodles-vcf/CHANGELOG.md b/noodles-vcf/CHANGELOG.md index ccd6a6176..9ee28916a 100644 --- a/noodles-vcf/CHANGELOG.md +++ b/noodles-vcf/CHANGELOG.md @@ -4,6 +4,12 @@ ### Changed + * vcf/async/io/reader/header: Parse header line by line. + + The async header parser can now build a `vcf::Header` by parsing a raw + header line by line. This makes it so that it is no longer required to read + the entire raw header into memory before parsing. + * vcf/io/writer/record/reference_bases: Resolve IUPAC ambiguity codes ([#268]). diff --git a/noodles-vcf/src/async/io/reader/header.rs b/noodles-vcf/src/async/io/reader/header.rs index c76a0fd0a..67cfdf280 100644 --- a/noodles-vcf/src/async/io/reader/header.rs +++ b/noodles-vcf/src/async/io/reader/header.rs @@ -4,9 +4,9 @@ use std::{ }; use pin_project_lite::pin_project; -use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt, ReadBuf}; +use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, ReadBuf}; -use crate::Header; +use crate::{header, Header}; pin_project! { struct Reader { @@ -80,14 +80,51 @@ pub(super) async fn read_header(reader: &mut R) -> io::Result
where R: AsyncBufRead + Unpin, { - let mut s = String::new(); - Reader::new(reader).read_to_string(&mut s).await?; - s.parse() - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) + let mut reader = Reader::new(reader); + + let mut parser = header::Parser::default(); + let mut buf = Vec::new(); + + while read_line(&mut reader, &mut buf).await? != 0 { + parser + .parse_partial(&buf) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + } + + parser + .finish() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) +} + +async fn read_line(reader: &mut R, dst: &mut Vec) -> io::Result +where + R: AsyncBufRead + Unpin, +{ + const LINE_FEED: u8 = b'\n'; + const CARRIAGE_RETURN: u8 = b'\r'; + + dst.clear(); + + match reader.read_until(LINE_FEED, dst).await? { + 0 => Ok(0), + n => { + if dst.ends_with(&[LINE_FEED]) { + dst.pop(); + + if dst.ends_with(&[CARRIAGE_RETURN]) { + dst.pop(); + } + } + + Ok(n) + } + } } #[cfg(test)] mod tests { + use tokio::io::AsyncReadExt; + use super::*; #[tokio::test]