Skip to content

Commit

Permalink
vcf/async/io/reader/header: Parse header line by line
Browse files Browse the repository at this point in the history
The async header parser can now build a `vcf::Header` by parsing a raw
header line by line. This makes it so that it is no longer required to
read the entire raw header into memory before parsing.
  • Loading branch information
zaeleus committed May 29, 2024
1 parent 067e8cc commit 4ed0117
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 6 deletions.
6 changes: 6 additions & 0 deletions noodles-vcf/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

### Changed

* vcf/async/io/reader/header: Parse header line by line.

The async header parser can now build a `vcf::Header` by parsing a raw
header line by line. This makes it so that it is no longer required to read
the entire raw header into memory before parsing.

* vcf/io/writer/record/reference_bases: Resolve IUPAC ambiguity codes
([#268]).

Expand Down
49 changes: 43 additions & 6 deletions noodles-vcf/src/async/io/reader/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use std::{
};

use pin_project_lite::pin_project;
use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt, ReadBuf};
use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, ReadBuf};

use crate::Header;
use crate::{header, Header};

pin_project! {
struct Reader<R> {
Expand Down Expand Up @@ -80,14 +80,51 @@ pub(super) async fn read_header<R>(reader: &mut R) -> io::Result<Header>
where
R: AsyncBufRead + Unpin,
{
let mut s = String::new();
Reader::new(reader).read_to_string(&mut s).await?;
s.parse()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))
let mut reader = Reader::new(reader);

let mut parser = header::Parser::default();
let mut buf = Vec::new();

while read_line(&mut reader, &mut buf).await? != 0 {
parser
.parse_partial(&buf)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
}

parser
.finish()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}

async fn read_line<R>(reader: &mut R, dst: &mut Vec<u8>) -> io::Result<usize>
where
R: AsyncBufRead + Unpin,
{
const LINE_FEED: u8 = b'\n';
const CARRIAGE_RETURN: u8 = b'\r';

dst.clear();

match reader.read_until(LINE_FEED, dst).await? {
0 => Ok(0),
n => {
if dst.ends_with(&[LINE_FEED]) {
dst.pop();

if dst.ends_with(&[CARRIAGE_RETURN]) {
dst.pop();
}
}

Ok(n)
}
}
}

#[cfg(test)]
mod tests {
use tokio::io::AsyncReadExt;

use super::*;

#[tokio::test]
Expand Down

0 comments on commit 4ed0117

Please sign in to comment.