From 1e6a11a267191fbd767a36a62152498570f0dc6b Mon Sep 17 00:00:00 2001 From: Austin Schey Date: Sat, 14 Feb 2026 22:51:28 -0800 Subject: [PATCH] fix: fallback when BOM is not present in UTF-16 data Co-authored-by: Serial <69764315+serial-ata@users.noreply.github.com> --- CHANGELOG.md | 6 +++++ lofty/src/id3/v2/items/language_frame.rs | 31 ++++++++++++++---------- lofty/src/id3/v2/items/sync_text.rs | 30 ++++++++++++++--------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08cc49d16..1605ceb61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- **ID3v2**: Don't error on empty UTF-16 descriptions ([issue](https://github.com/Serial-ATA/lofty-rs/issues/613)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/614)) + ## [0.23.2] - 2026-02-14 +### Fixed + - **FLAC**: - Fix duplicate `Last-metadata-block` flags in the presence of PADDING blocks ([issue](https://github.com/Serial-ATA/lofty-rs/issues/607)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/609)) - Ignore ID3v2 tags when not stripped during write ([issue](https://github.com/Serial-ATA/lofty-rs/issues/608)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/609)) diff --git a/lofty/src/id3/v2/items/language_frame.rs b/lofty/src/id3/v2/items/language_frame.rs index 76e468f96..65d21e7ad 100644 --- a/lofty/src/id3/v2/items/language_frame.rs +++ b/lofty/src/id3/v2/items/language_frame.rs @@ -3,7 +3,6 @@ use crate::error::{Id3v2Error, Id3v2ErrorKind, Result}; use crate::id3::v2::frame::content::verify_encoding; use crate::id3::v2::header::Id3v2Version; use crate::id3::v2::{FrameFlags, FrameHeader, FrameId}; -use crate::macros::err; use crate::tag::items::Lang; use crate::util::text::{ DecodeTextResult, TextDecodeOptions, TextEncoding, decode_text, @@ -49,20 +48,26 @@ impl LanguageFrame { TextDecodeOptions::new().encoding(encoding).terminated(true), )?; - let mut endianness: fn([u8; 2]) -> u16 = u16::from_le_bytes; - - // It's possible for the description to be the only string with a BOM - // To be safe, we change the encoding to the concrete variant determined from the description - if encoding == TextEncoding::UTF16 { - endianness = match bom { - [0xFF, 0xFE] => u16::from_le_bytes, - [0xFE, 0xFF] => u16::from_be_bytes, - _ => err!(TextDecode("UTF-16 string missing a BOM")), - }; - } + // There are 3 possibilities for UTF-16 encoded frames: + // + // * The description is the only string with a BOM + // * The description is empty (has no BOM) + // * All strings have a BOM + // + // To be safe, we change the encoding to the concrete variant determined from the description. + // Otherwise, we just have to hope that the other fields are encoded properly. + let endianness: Option u16> = if encoding == TextEncoding::UTF16 { + match bom { + [0xFF, 0xFE] => Some(u16::from_le_bytes), + [0xFE, 0xFF] => Some(u16::from_be_bytes), + _ => None, + } + } else { + None + }; let content; - if encoding == TextEncoding::UTF16 { + if let Some(endianness) = endianness { (content, _) = utf16_decode_terminated_maybe_bom(reader, endianness)?; } else { content = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content; diff --git a/lofty/src/id3/v2/items/sync_text.rs b/lofty/src/id3/v2/items/sync_text.rs index 7ff01edd9..9404dcc9b 100644 --- a/lofty/src/id3/v2/items/sync_text.rs +++ b/lofty/src/id3/v2/items/sync_text.rs @@ -159,17 +159,23 @@ impl SynchronizedTextFrame<'_> { ) .map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?; - let mut endianness: fn([u8; 2]) -> u16 = u16::from_le_bytes; - - // It's possible for the description to be the only string with a BOM - // To be safe, we change the encoding to the concrete variant determined from the description - if encoding == TextEncoding::UTF16 { - endianness = match bom { - [0xFF, 0xFE] => u16::from_le_bytes, - [0xFE, 0xFF] => u16::from_be_bytes, - _ => err!(TextDecode("UTF-16 string missing a BOM")), - }; - } + // There are 3 possibilities for UTF-16 encoded frames: + // + // * The description is the only string with a BOM + // * The description is empty (has no BOM) + // * All strings have a BOM + // + // To be safe, we change the encoding to the concrete variant determined from the description. + // Otherwise, we just have to hope that the other fields are encoded properly. + let endianness: Option u16> = if encoding == TextEncoding::UTF16 { + match bom { + [0xFF, 0xFE] => Some(u16::from_le_bytes), + [0xFE, 0xFF] => Some(u16::from_be_bytes), + _ => None, + } + } else { + None + }; let mut pos = 0; let total = (data.len() - 6) as u64 - cursor.stream_position()?; @@ -178,7 +184,7 @@ impl SynchronizedTextFrame<'_> { while pos < total { let text; - if encoding == TextEncoding::UTF16 { + if let Some(endianness) = endianness { let (decoded, bytes_read) = utf16_decode_terminated_maybe_bom(&mut cursor, endianness) .map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;