diff --git a/CHANGELOG.md b/CHANGELOG.md index c829b4bc9..4c840b462 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **AAC/ADTS**: Fix frame header search ([issue](https://github.com/Serial-ATA/lofty-rs/issues/584)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/586)) - When searching for the next frame, the parser was not fully skipping the previous one. If the AAC payload contained the frame sync bits and an otherwise invalid ADTS header, then the parser would error. +- **FLAC**: Fix corruption of files with no metadata blocks ([issue](https://github.com/Serial-ATA/lofty-rs/issues/549)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/583)) ### Removed diff --git a/lofty/src/flac/block.rs b/lofty/src/flac/block.rs index ee2a596ad..fb2dcfc40 100644 --- a/lofty/src/flac/block.rs +++ b/lofty/src/flac/block.rs @@ -1,11 +1,12 @@ #![allow(dead_code)] use crate::error::Result; -use crate::macros::try_vec; +use crate::macros::{err, try_vec}; +use crate::picture::{Picture, PictureInformation}; -use std::io::{Read, Seek, SeekFrom}; +use std::io::{Cursor, Read, Seek, SeekFrom, Write}; -use byteorder::{BigEndian, ReadBytesExt}; +use byteorder::{BigEndian, LittleEndian, ReadBytesExt, WriteBytesExt}; pub(in crate::flac) const BLOCK_ID_STREAMINFO: u8 = 0; pub(in crate::flac) const BLOCK_ID_PADDING: u8 = 1; @@ -16,7 +17,6 @@ pub(in crate::flac) const BLOCK_ID_PICTURE: u8 = 6; const BLOCK_HEADER_SIZE: u64 = 4; pub(crate) struct Block { - pub(super) byte: u8, pub(super) ty: u8, pub(super) last: bool, pub(crate) content: Vec, @@ -25,6 +25,9 @@ pub(crate) struct Block { } impl Block { + pub(super) const BLOCK_HEADER_SIZE: usize = 4; + pub(super) const MAX_CONTENT_SIZE: u32 = 16_777_215; + pub(crate) fn read(data: &mut R, mut predicate: P) -> Result where R: Read + Seek, @@ -51,7 +54,6 @@ impl Block { let end = start + u64::from(size) + BLOCK_HEADER_SIZE; Ok(Self { - byte, ty, last, content, @@ -59,4 +61,81 @@ impl Block { end, }) } + + pub fn len(&self) -> u32 { + (Self::BLOCK_HEADER_SIZE as u32) + self.content.len() as u32 + } + + pub(super) fn new_padding(size: usize) -> Result { + let block_size = core::cmp::min(size, Self::MAX_CONTENT_SIZE as usize); + let content = try_vec![0; block_size]; + Ok(Self { + ty: BLOCK_ID_PADDING, + last: false, + content, + start: 0, + end: 0, + }) + } + + pub(super) fn new_picture(picture: &Picture, info: PictureInformation) -> Result { + let picture_data = picture.as_flac_bytes(info, false); + if picture_data.len() > Self::MAX_CONTENT_SIZE as usize { + err!(TooMuchData); + } + + Ok(Self { + ty: BLOCK_ID_PICTURE, + last: false, + content: picture_data, + start: 0, + end: 0, + }) + } + + pub(super) fn new_comments<'a>( + vendor: &str, + items: &mut impl Iterator, + ) -> Result { + let mut comments = Cursor::new(Vec::new()); + + comments.write_u32::(vendor.len() as u32)?; + comments.write_all(vendor.as_bytes())?; + + let item_count_pos = comments.stream_position()?; + let mut count = 0; + + comments.write_u32::(count)?; + + crate::ogg::write::create_comments(&mut comments, &mut count, items)?; + + if comments.get_ref().len() > Block::MAX_CONTENT_SIZE as usize { + err!(TooMuchData); + } + + comments.seek(SeekFrom::Start(item_count_pos))?; + comments.write_u32::(count)?; + + Ok(Self { + ty: BLOCK_ID_VORBIS_COMMENTS, + last: false, + content: comments.into_inner(), + start: 0, + end: 0, + }) + } + + pub(super) fn write_to(&self, writer: &mut W) -> Result + where + W: Write, + { + let block_content_size = + core::cmp::min(self.content.len(), Self::MAX_CONTENT_SIZE as usize); + + writer.write_u8((self.ty & 0x7F) | u8::from(self.last) << 7)?; + writer.write_u24::(block_content_size as u32)?; + writer.write_all(&self.content)?; + + Ok(Self::BLOCK_HEADER_SIZE + self.content.len()) + } } diff --git a/lofty/src/flac/write.rs b/lofty/src/flac/write.rs index 74bab8c27..75bb366d6 100644 --- a/lofty/src/flac/write.rs +++ b/lofty/src/flac/write.rs @@ -4,18 +4,15 @@ use crate::config::WriteOptions; use crate::error::{LoftyError, Result}; use crate::macros::{err, try_vec}; use crate::ogg::tag::VorbisCommentsRef; -use crate::ogg::write::create_comments; use crate::picture::{Picture, PictureInformation}; use crate::tag::{Tag, TagType}; use crate::util::io::{FileLike, Length, Truncate}; use std::borrow::Cow; -use std::io::{Cursor, Read, Seek, SeekFrom, Write}; +use std::io::{Cursor, Read, Seek, SeekFrom}; +use std::iter::Peekable; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; - -const BLOCK_HEADER_SIZE: usize = 4; -const MAX_BLOCK_SIZE: u32 = 16_777_215; +use byteorder::{LittleEndian, ReadBytesExt}; pub(crate) fn write_to(file: &mut F, tag: &Tag, write_options: WriteOptions) -> Result<()> where @@ -43,6 +40,26 @@ where } } +/// The location of the last metadata block +/// +/// Depending on the order of the metadata blocks, the metadata present for writing, and the [`WriteOptions`], the +/// writer will have to determine when to set the `Last-metadata-block` flag. This is because the writer +/// doesn't fully parse the file, it only writes what has changed. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum LastBlock { + /// The last block in the current file is the stream info + StreamInfo, + /// The last block will be the newly written Vorbis Comments + Comments, + /// The last block will be the final newly written picture + Picture, + /// The last block will be the newly written padding block, if the user allows for padding (most common case) + Padding, + /// The last block already exists in the stream and will not be touched while writing, so nothing + /// to do + Other, +} + pub(crate) fn write_to_inner<'a, F, II, IP>( file: &mut F, tag: &mut VorbisCommentsRef<'a, II, IP>, @@ -54,195 +71,244 @@ where II: Iterator, IP: Iterator, { - let stream_info = verify_flac(file)?; - - let mut last_block = stream_info.last; - let mut file_bytes = Vec::new(); file.read_to_end(&mut file_bytes)?; let mut cursor = Cursor::new(file_bytes); - // TODO: We need to actually use padding (https://github.com/Serial-ATA/lofty-rs/issues/445) - let mut end_padding_exists = false; - let mut last_block_info = ( - stream_info.byte, - stream_info.start as usize, - stream_info.end as usize, - ); + let stream_info = verify_flac(&mut cursor)?; + let stream_info_start = stream_info.start as usize; + let stream_info_end = stream_info.end as usize; - let mut blocks_to_remove = Vec::new(); + let BlockCollector { + vendor, + mut blocks, + mut last_block_location, + mut last_block_replaced, + } = BlockCollector::collect(&mut cursor, stream_info)?; - while !last_block { - let block = Block::read(&mut cursor, |block_ty| block_ty == BLOCK_ID_VORBIS_COMMENTS)?; - let start = block.start; - let end = block.end; + if let Some(vendor) = vendor { + tag.vendor = vendor; + } - let block_type = block.ty; - last_block = block.last; + let mut comments_peek = (&mut tag.items).peekable(); + let mut pictures_peek = (&mut tag.pictures).peekable(); - if last_block { - last_block_info = (block.byte, (end - start) as usize, end as usize) - } + let has_comments = comments_peek.peek().is_some(); + let has_pictures = pictures_peek.peek().is_some(); - match block_type { - BLOCK_ID_VORBIS_COMMENTS => { - blocks_to_remove.push((start, end)); - - // Retain the original vendor string - let reader = &mut &block.content[..]; - - let vendor_len = reader.read_u32::()?; - let mut vendor = try_vec![0; vendor_len as usize]; - reader.read_exact(&mut vendor)?; - - // TODO: Error on strict? - let Ok(vendor_str) = String::from_utf8(vendor) else { - log::warn!("FLAC vendor string is not valid UTF-8, not re-using"); - tag.vendor = Cow::Borrowed(""); - continue; - }; - - tag.vendor = Cow::Owned(vendor_str); - }, - BLOCK_ID_PICTURE => blocks_to_remove.push((start, end)), - BLOCK_ID_PADDING => { - if last_block { - end_padding_exists = true - } else { - blocks_to_remove.push((start, end)) - } - }, - _ => {}, - } + // Attempting to strip an already empty file + let has_blocks_to_remove = blocks.iter().any(|b| b.for_removal); + if !has_blocks_to_remove && !has_comments && !has_pictures { + log::debug!("Nothing to do"); + return Ok(()); } + // TODO: We need to actually use padding (https://github.com/Serial-ATA/lofty-rs/issues/445) + let will_write_padding = + last_block_location != LastBlock::Padding && write_options.preferred_padding.is_some(); let mut file_bytes = cursor.into_inner(); - if !end_padding_exists { + if last_block_location == LastBlock::StreamInfo { + if has_comments || has_pictures { + // Stream info is *always* the first block, so it'll always be replaced if we add more blocks + last_block_replaced = true; + + if has_pictures { + last_block_location = LastBlock::Picture; + } else { + last_block_location = LastBlock::Comments; + } + } else if !will_write_padding { + // Otherwise, we set the `Last-metadata-block` flag + file_bytes[stream_info_start] |= 0x80; + } + } + + let last_metadata_block = &blocks + .last() + .expect("should always have at least one block") + .block; + if will_write_padding { if let Some(preferred_padding) = write_options.preferred_padding { log::warn!("File is missing a PADDING block. Adding one"); - let mut first_byte = 0_u8; - first_byte |= last_block_info.0 & 0x7F; - - file_bytes[last_block_info.1] = first_byte; - - let block_size = core::cmp::min(preferred_padding, MAX_BLOCK_SIZE); - let mut padding_block = try_vec![0; BLOCK_HEADER_SIZE + block_size as usize]; + // `PADDING` always goes last + last_block_replaced = true; - let mut padding_byte = 0; - padding_byte |= 0x80; - padding_byte |= 1 & 0x7F; + let mut padding_block = Block::new_padding(preferred_padding as usize)?; + padding_block.last = true; - padding_block[0] = padding_byte; - padding_block[1..4].copy_from_slice(&block_size.to_be_bytes()[1..]); + let mut encoded_padding_block = Vec::with_capacity(padding_block.len() as usize); + padding_block.write_to(&mut encoded_padding_block)?; - file_bytes.splice(last_block_info.2..last_block_info.2, padding_block); + let metadata_end = last_metadata_block.end as usize; + file_bytes.splice(metadata_end..metadata_end, encoded_padding_block); } } - let mut comment_blocks = Cursor::new(Vec::new()); - - create_comment_block(&mut comment_blocks, &tag.vendor, &mut tag.items)?; - - let mut comment_blocks = comment_blocks.into_inner(); - - create_picture_blocks(&mut comment_blocks, &mut tag.pictures)?; - - if blocks_to_remove.is_empty() { - file_bytes.splice(0..0, comment_blocks); - } else { - blocks_to_remove.sort_unstable(); - blocks_to_remove.reverse(); - - let first = blocks_to_remove.pop().unwrap(); // Infallible + // For example, if the file's previous last block was `STREAMINFO`, and we wrote a `VORBIS_COMMENT` block, + // then we unset the `Last-metadata-block` flag on the `STREAMINFO`. + if last_block_replaced { + file_bytes[last_metadata_block.start as usize] = last_metadata_block.ty; + } - for (s, e) in &blocks_to_remove { - file_bytes.drain(*s as usize..*e as usize); + let metadata_blocks = encode_tag( + &tag.vendor, + comments_peek, + pictures_peek, + last_block_location, + )?; + + blocks.reverse(); + for block in blocks { + if !block.for_removal { + continue; } - file_bytes.splice(first.0 as usize..first.1 as usize, comment_blocks); + file_bytes.drain(block.block.start as usize..block.block.end as usize); } - file.seek(SeekFrom::Start(stream_info.end))?; - file.truncate(stream_info.end)?; + file_bytes.splice(stream_info_end..stream_info_end, metadata_blocks); + + file.seek(SeekFrom::Start(0))?; file.write_all(&file_bytes)?; Ok(()) } -fn create_comment_block( - writer: &mut Cursor>, +fn encode_tag<'a, II, IP>( vendor: &str, - items: &mut dyn Iterator, -) -> Result<()> { - let mut peek = items.peekable(); - - if peek.peek().is_some() { - let mut byte = 0_u8; - byte |= 4 & 0x7F; + mut comments_peek: Peekable<&mut II>, + mut pictures_peek: Peekable<&mut IP>, + last_block_location: LastBlock, +) -> Result> +where + II: Iterator, + IP: Iterator, +{ + let mut metadata_blocks = Cursor::new(Vec::new()); - writer.write_u8(byte)?; - writer.write_u32::(vendor.len() as u32)?; - writer.write_all(vendor.as_bytes())?; + if comments_peek.peek().is_some() { + let mut block = Block::new_comments(vendor, &mut comments_peek)?; + if last_block_location == LastBlock::Comments { + block.last = true; + } - let item_count_pos = writer.stream_position()?; - let mut count = 0; + let block_size = block.write_to(&mut metadata_blocks)?; - writer.write_u32::(count)?; + log::trace!("Wrote a comment block, size: {block_size}",); + } - create_comments(writer, &mut count, &mut peek)?; + loop { + let Some((picture, info)) = pictures_peek.next() else { + break; + }; - let len = (writer.get_ref().len() - 1) as u32; + let is_last_picture = pictures_peek.peek().is_none(); - if len > MAX_BLOCK_SIZE { - err!(TooMuchData); + let mut block = Block::new_picture(picture, info)?; + if is_last_picture && last_block_location == LastBlock::Picture { + block.last = true; } - let comment_end = writer.stream_position()?; - - writer.seek(SeekFrom::Start(item_count_pos))?; - writer.write_u32::(count)?; - - writer.seek(SeekFrom::Start(comment_end))?; - writer - .get_mut() - .splice(1..1, len.to_be_bytes()[1..].to_vec()); - - // size = block type + vendor length + vendor + item count + items - log::trace!( - "Wrote a comment block, size: {}", - 1 + 4 + vendor.len() + 4 + (len as usize) - ); + let block_size = block.write_to(&mut metadata_blocks)?; + log::trace!("Wrote a picture block, size: {block_size}"); } - Ok(()) + Ok(metadata_blocks.into_inner()) } -fn create_picture_blocks( - writer: &mut Vec, - pictures: &mut dyn Iterator, -) -> Result<()> { - let mut byte = 0_u8; - byte |= 6 & 0x7F; - - for (pic, info) in pictures { - writer.write_u8(byte)?; +struct CollectedBlock { + for_removal: bool, + block: Block, +} - let pic_bytes = pic.as_flac_bytes(info, false); - let pic_len = pic_bytes.len() as u32; +struct BlockCollector { + vendor: Option>, + blocks: Vec, + last_block_location: LastBlock, + /// The *current* last block in the file will be replaced by whatever we end up writing, so we + /// need to set the `Last-metadata-block` flag on it + last_block_replaced: bool, +} - if pic_len > MAX_BLOCK_SIZE { - err!(TooMuchData); +impl BlockCollector { + /// Collect all the blocks in the file and mark which ones will be removed by this write + fn collect(reader: &mut R, stream_info: Block) -> Result + where + R: Read + Seek, + { + // Vendor string from the file, if it exists + let mut vendor = None; + + let mut last_block_location = LastBlock::StreamInfo; + let mut last_block_replaced = false; + + let mut is_last_block = stream_info.last; + let mut blocks = Vec::new(); + blocks.push(CollectedBlock { + for_removal: false, + block: stream_info, + }); + + while !is_last_block { + let block = Block::read(reader, |block_ty| block_ty == BLOCK_ID_VORBIS_COMMENTS)?; + is_last_block = block.last; + + let mut for_removal = false; + match block.ty { + BLOCK_ID_VORBIS_COMMENTS => { + for_removal = true; + + // Retain the original vendor string + let reader = &mut &block.content[..]; + + let vendor_len = reader.read_u32::()?; + let mut vendor_raw = try_vec![0; vendor_len as usize]; + reader.read_exact(&mut vendor_raw)?; + + match String::from_utf8(vendor_raw) { + Ok(vendor_str) => vendor = Some(Cow::Owned(vendor_str)), + // TODO: Error on strict? + Err(_) => { + log::warn!("FLAC vendor string is not valid UTF-8, not re-using"); + vendor = Some(Cow::Borrowed("")); + }, + } + + if is_last_block { + last_block_replaced = true; + } + }, + BLOCK_ID_PICTURE => { + for_removal = true; + if is_last_block { + last_block_replaced = true; + } + }, + BLOCK_ID_PADDING => { + if is_last_block { + last_block_location = LastBlock::Padding; + } else { + for_removal = true; + } + }, + _ => { + if is_last_block { + last_block_location = LastBlock::Other; + } + }, + } + + blocks.push(CollectedBlock { for_removal, block }); } - writer.write_all(&pic_len.to_be_bytes()[1..])?; - writer.write_all(pic_bytes.as_slice())?; - - // size = block type + block length + data - log::trace!("Wrote a picture block, size: {}", 1 + 3 + pic_len); + Ok(Self { + vendor, + blocks, + last_block_location, + last_block_replaced, + }) } - - Ok(()) } diff --git a/lofty/tests/files/assets/application_block_last.flac b/lofty/tests/files/assets/application_block_last.flac new file mode 100644 index 000000000..04048aedb Binary files /dev/null and b/lofty/tests/files/assets/application_block_last.flac differ diff --git a/lofty/tests/files/assets/stream_info_last.flac b/lofty/tests/files/assets/stream_info_last.flac new file mode 100644 index 000000000..6d65a6498 Binary files /dev/null and b/lofty/tests/files/assets/stream_info_last.flac differ diff --git a/lofty/tests/files/flac.rs b/lofty/tests/files/flac.rs index 1e8674349..520d62769 100644 --- a/lofty/tests/files/flac.rs +++ b/lofty/tests/files/flac.rs @@ -5,7 +5,8 @@ use std::io::Seek; use lofty::config::{ParseOptions, ParsingMode, WriteOptions}; use lofty::flac::FlacFile; -use lofty::ogg::VorbisComments; +use lofty::ogg::{OggPictureStorage, VorbisComments}; +use lofty::picture::{Picture, PictureInformation, PictureType}; use lofty::prelude::*; #[test_log::test] @@ -66,3 +67,117 @@ fn retain_vendor_string() { // The vendor string should be retained assert_eq!(f.vorbis_comments().unwrap().vendor(), "Lavf58.76.100"); } + +// The final written metadata block will be vorbis comments +#[test_log::test] +fn stream_info_is_last() { + // The file only has a stream info metadata block + let mut file = temp_file("tests/files/assets/stream_info_last.flac"); + + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + file.rewind().unwrap(); + + assert!(f.vorbis_comments().is_none()); + + let mut tag = VorbisComments::new(); + tag.set_artist(String::from("Foo Artist")); + tag.save_to(&mut file, WriteOptions::new()).unwrap(); + + file.rewind().unwrap(); + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + + assert_eq!( + f.vorbis_comments().unwrap().artist().as_deref(), + Some("Foo Artist") + ); +} + +// Same as previous test, except the new tag will be written with no padding +#[test_log::test] +fn stream_info_is_last_no_padding() { + let mut file = temp_file("tests/files/assets/stream_info_last.flac"); + + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + file.rewind().unwrap(); + + assert!(f.vorbis_comments().is_none()); + + let mut tag = VorbisComments::new(); + tag.set_artist(String::from("Foo Artist")); + tag.save_to(&mut file, WriteOptions::new().preferred_padding(0)) + .unwrap(); + + file.rewind().unwrap(); + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + + assert_eq!( + f.vorbis_comments().unwrap().artist().as_deref(), + Some("Foo Artist") + ); + + // Stripping the Vorbis Comments should set the `STREAMINFO` block as the last metadata block + file.rewind().unwrap(); + let tag = VorbisComments::new(); + tag.save_to(&mut file, WriteOptions::new().preferred_padding(0)) + .unwrap(); + + file.rewind().unwrap(); + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + + assert!(f.vorbis_comments().is_none()); +} + +// The final written metadata block will be a picture +#[test_log::test] +fn picture_is_last() { + let mut file = temp_file("tests/files/assets/stream_info_last.flac"); + + let mut f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + file.rewind().unwrap(); + + assert!(f.vorbis_comments().is_none()); + assert!(f.pictures().is_empty()); + + f.insert_picture( + Picture::unchecked(Vec::new()) + .pic_type(PictureType::CoverFront) + .build(), + Some(PictureInformation { + width: 200, + height: 200, + color_depth: 0, + num_colors: 16, + }), + ) + .unwrap(); + f.save_to(&mut file, WriteOptions::new().preferred_padding(0)) + .unwrap(); + + file.rewind().unwrap(); + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + + assert_eq!(f.pictures().len(), 1); +} + +#[test_log::test] +fn application_block_is_last() { + let mut file = temp_file("tests/files/assets/application_block_last.flac"); + + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + file.rewind().unwrap(); + + assert!(f.vorbis_comments().is_none()); + + let mut tag = VorbisComments::new(); + tag.set_artist(String::from("Foo Artist")); + tag.save_to(&mut file, WriteOptions::new().preferred_padding(0)) + .unwrap(); + + file.rewind().unwrap(); + let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); + + assert_eq!( + f.vorbis_comments().unwrap().artist().as_deref(), + Some("Foo Artist") + ); +} diff --git a/lofty/tests/taglib/test_flac.rs b/lofty/tests/taglib/test_flac.rs index 14f3f59d6..b8661d34e 100644 --- a/lofty/tests/taglib/test_flac.rs +++ b/lofty/tests/taglib/test_flac.rs @@ -569,7 +569,10 @@ fn test_empty_seek_table() { file.rewind().unwrap(); { let f = FlacFile::read_from(&mut file, ParseOptions::new()).unwrap(); - file.seek(SeekFrom::Start(42)).unwrap(); + + // NOTE: TagLib has this at offset 42. This is because we always shift any blocks we write + // to be immediately after `STREAMINFO`, whereas TagLib will append them to the end. + file.seek(SeekFrom::Start(113)).unwrap(); assert!(f.vorbis_comments().is_some()); let mut data = [0; 4];