diff --git a/CHANGELOG.md b/CHANGELOG.md index 03c3d44..c3195ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `MagnetLink` now refuses to parse strings that contain a newline (`\n`), producing a `MagnetLinkError::InvalidURINewLine` error +- `MagnetLink::from_url`, `PeerSource::from_url`, and `Tracker::from_url` now take a + `fluent_uri::Uri` instead of a `url::Url` previously +- all error types with an `InvalidURL` variant now have `fluent_uri::ParseError` + as source instead of `url::ParseError` previously + +### Added + +- `MagnetLink` implements `Display`, so it can be converted to a string again + using `MagnetLink::to_string`. +- `MagnetLink::unsafe_parse_query` allows iterating carefully around magnet link + query key/values +- Added new `MagnetLinkError` variants to be more precise about what's wrong with + a parsed magnet link. ## Version 0.3.2 (2025-08-29) diff --git a/Cargo.toml b/Cargo.toml index 5c67879..fcce204 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ sha1 = "0.10" sha256 = "1.5" rustc-hex = "2.1" serde = { version = "1", features = [ "derive" ] } -url = "2.5" +fluent-uri = { git = "https://github.com/yescallop/fluent-uri-rs", rev = "5ad3b65" } [dev-dependencies] serde_json = "1" diff --git a/src/magnet.rs b/src/magnet.rs index 035ec25..f6c920d 100644 --- a/src/magnet.rs +++ b/src/magnet.rs @@ -1,12 +1,23 @@ -use url::Url; +use fluent_uri::pct_enc::{encoder::Query, EStr}; +use fluent_uri::{ParseError as UriParseError, Uri}; use crate::{InfoHash, InfoHashError, TorrentID}; +use std::string::FromUtf8Error; + /// Error occurred during parsing a [`MagnetLink`](crate::magnet::MagnetLink). #[derive(Clone, Debug, PartialEq)] pub enum MagnetLinkError { - /// The URI was not valid according to [`Url::parse`](url::Url::parse). - InvalidURI { source: url::ParseError }, + /// The URI was not valid according to [`Uri::parse`](fluent_uri::Uri::parse). + InvalidURI { source: UriParseError }, + /// The URI does not contain a query. + InvalidURINoQuery, + /// The URI query contains non-UTF8 chars + InvalidURIQueryUnicode { source: FromUtf8Error }, + /// The URI query contains a key without a value + InvalidURIQueryEmptyValue { key: String }, + /// The URI query contains a non-urlencoded `?` beyond the query declaration + InvalidURIQueryInterrogation, /// The URI contains a newline InvalidURINewLine, /// The URI scheme was not `magnet` @@ -19,6 +30,8 @@ pub enum MagnetLinkError { InvalidHash { source: InfoHashError }, /// Too many hashes were found in the magnet URI, expected two at most. TooManyHashes { number: usize }, + /// There were two or more `dn` declarations in the magnet query. + DuplicateName, /// No name was contained in the magnet URI. This is technically allowed by /// some implementations, but should not be encouraged/supported. #[cfg(feature = "magnet_force_name")] @@ -31,6 +44,18 @@ impl std::fmt::Display for MagnetLinkError { MagnetLinkError::InvalidURI { source } => { write!(f, "Invalid URI: {source}") } + MagnetLinkError::InvalidURINoQuery => { + write!(f, "Invalid URI: no query string") + } + MagnetLinkError::InvalidURIQueryEmptyValue { key } => { + write!(f, "Invalid URI: query has key {key} with no value") + } + MagnetLinkError::InvalidURIQueryUnicode { .. } => { + write!(f, "Invalid URI: the query part contains non-utf8 chars") + } + MagnetLinkError::InvalidURIQueryInterrogation => { + write!(f, "Invalid URI: the query part should only contain one `?`") + } MagnetLinkError::InvalidURINewLine => { write!(f, "Invalid URI: newlines are not allowed in magnet links") } @@ -46,6 +71,12 @@ impl std::fmt::Display for MagnetLinkError { MagnetLinkError::TooManyHashes { number } => { write!(f, "Too many hashes ({number})") } + MagnetLinkError::DuplicateName => { + write!( + f, + "Too many name declarations for the magnet, only expecting one." + ) + } #[cfg(feature = "magnet_force_name")] MagnetLinkError::NoNameFound => { write!(f, "No name found") @@ -60,9 +91,15 @@ impl From for MagnetLinkError { } } -impl From for MagnetLinkError { - fn from(e: url::ParseError) -> MagnetLinkError { - MagnetLinkError::InvalidURI { source: e } +impl From<(UriParseError, Input)> for MagnetLinkError { + fn from(e: (UriParseError, Input)) -> MagnetLinkError { + MagnetLinkError::InvalidURI { source: e.0 } + } +} + +impl From for MagnetLinkError { + fn from(e: FromUtf8Error) -> MagnetLinkError { + MagnetLinkError::InvalidURIQueryUnicode { source: e } } } @@ -71,6 +108,7 @@ impl std::error::Error for MagnetLinkError { match self { MagnetLinkError::InvalidURI { source } => Some(source), MagnetLinkError::InvalidHash { source } => Some(source), + // MagnetLinkError::InvalidURIQueryUnicode { source } => Some(source), _ => None, } } @@ -80,11 +118,19 @@ impl std::error::Error for MagnetLinkError { /// /// The MagnetLink can provide information about the torrent /// [`name`](crate::magnet::MagnetLink::name) and [`hash`](crate::magnet::MagnetLink::hash). -/// Other fields can be contained in the magnet URI, as explained [on Wikipedia](https://en.wikipedia.org/wiki/Magnet_URI_scheme). However, -/// they are currently not exposed by this library. +/// +/// More information is specified in [BEP-0009](https://bittorrent.org/beps/bep_0009.html), and +/// even more appears in the wild, as explained [on Wikipedia](https://en.wikipedia.org/wiki/Magnet_URI_scheme). #[derive(Clone, Debug)] pub struct MagnetLink { + /// Only mandatory field for magnet link parsing, unless the + /// `magnet_force_name` crate feature is enabled. hash: InfoHash, + /// Original query string from which the magnet was parsed. + /// Used to format the magnet link back to a string. + query: String, + /// Name of the torrent, which may be empty unless + /// `magnet_force_name` crate feature is enabled. name: String, } @@ -97,7 +143,8 @@ impl MagnetLink { if s.contains('\n') { return Err(MagnetLinkError::InvalidURINewLine); } - let u = Url::parse(s)?; + + let u = Uri::parse(s.to_string())?; MagnetLink::from_url(&u) } @@ -110,8 +157,8 @@ impl MagnetLink { /// `urn:btmh:1220` for v2 infohash) /// - more than one hash of the same type was found /// - the hashes were not valid according to [`InfoHash::new`](crate::hash::InfoHash::new) - pub fn from_url(u: &Url) -> Result { - if u.scheme() != "magnet" { + pub fn from_url(u: &Uri) -> Result { + if u.scheme().as_str() != "magnet" { return Err(MagnetLinkError::InvalidScheme { scheme: u.scheme().to_string(), }); @@ -120,10 +167,23 @@ impl MagnetLink { let mut name = String::new(); let mut hashes: Vec = Vec::new(); - for (key, val) in u.query_pairs() { - // Deref cow into str then reference it - match &*key { + let query = u.query().ok_or(MagnetLinkError::InvalidURINoQuery)?; + for (key, val) in Self::unsafe_parse_query(query)? { + // magnets should not allow unescaped ? in query value + if val.as_str().contains('?') { + return Err(MagnetLinkError::InvalidURIQueryInterrogation); + } + + // magnets should not allow empty query values + if val.is_empty() { + return Err(MagnetLinkError::InvalidURIQueryEmptyValue { + key: key.as_str().to_string(), + }); + } + + match key.as_str() { "xt" => { + let val = val.as_str(); if val.starts_with("urn:btih:") { // Infohash v1 hashes.push(val.strip_prefix("urn:btih:").unwrap().to_string()); @@ -133,9 +193,22 @@ impl MagnetLink { } } "dn" => { - name.push_str(&val); + if !name.is_empty() { + return Err(MagnetLinkError::DuplicateName); + } + name = val + .decode() + .into_string()? + // fluent_uri explicitly does not decode U+002B (`+`) as a space + .replace('+', " ") + .to_owned(); + } + "tr" => { + // TODO: trackers + } + _ => { + continue; } - _ => continue, } } @@ -171,11 +244,40 @@ impl MagnetLink { }; Ok(MagnetLink { - name, hash: final_hash, + name: name.to_string(), + query: query.as_str().to_string(), }) } + /// Parse the query in a list of key->value entries with a percent-decoder attached. + /// + /// The results can be accessed raw with [EStr::as_str()] and percent-decoded with [EStr::decode]. + /// + /// This method only fails if the magnet query is empty (`magnet:`), but may produce unexpected + /// results because it does not apply magnet-specific sanitation. + /// + /// This method has a dangerous-sounding name because of percent-encoding. + /// If you aren't careful, you may end up with garbage data. This method + /// is not actually memory-unsafe. + /// + /// For example: + /// + /// - a key without a value may be returned + /// - duplicate entries may be returned (such as a double magnet name) + /// - a value with an unencoded `?` may be returned + #[allow(clippy::type_complexity)] + pub fn unsafe_parse_query( + query: &EStr, + ) -> Result, &EStr)>, MagnetLinkError> { + let pairs: Vec<(&EStr, &EStr)> = query + .split('&') + .map(|s| s.split_once('=').unwrap_or((s, EStr::EMPTY))) + .collect(); + + Ok(pairs) + } + /// Returns the [`InfoHash`](crate::hash::InfoHash) contained in the MagnetLink pub fn hash(&self) -> &InfoHash { &self.hash @@ -195,6 +297,12 @@ impl MagnetLink { } } +impl std::fmt::Display for MagnetLink { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "magnet:?{}", self.query) + } +} + #[cfg(test)] mod tests { use super::*; @@ -350,4 +458,17 @@ mod tests { assert_eq!(res.unwrap_err(), MagnetLinkError::InvalidURINewLine,); } + + #[test] + fn survives_roundtrip() { + // Here we test that parsing a magnet then displaying it again + // will produce exactly the same output. + let magnet_url = + Uri::parse(std::fs::read_to_string("tests/bittorrent-v2-test.magnet").unwrap()) + .unwrap(); + let magnet = MagnetLink::from_url(&magnet_url).unwrap(); + + let magnet_str = magnet.to_string(); + assert_eq!(&magnet_url.to_string(), &magnet_str); + } } diff --git a/src/tracker.rs b/src/tracker.rs index c795dc3..d22a232 100644 --- a/src/tracker.rs +++ b/src/tracker.rs @@ -1,4 +1,4 @@ -use url::Url; +use fluent_uri::{ParseError as UriParseError, Uri}; /// A source of peers. Can be a [`Tracker`](crate::tracker::Tracker) or a decentralized source. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] @@ -37,7 +37,7 @@ pub enum TrackerScheme { /// Error occurred during parsing a [`Tracker`](crate::tracker::Tracker). #[derive(Clone, Debug, PartialEq)] pub enum TrackerError { - InvalidURL { source: url::ParseError }, + InvalidURL { source: UriParseError }, InvalidScheme { scheme: String }, } @@ -59,9 +59,9 @@ impl std::error::Error for TrackerError { } } -impl From for TrackerError { - fn from(e: url::ParseError) -> TrackerError { - TrackerError::InvalidURL { source: e } +impl From<(UriParseError, Input)> for TrackerError { + fn from(e: (UriParseError, Input)) -> TrackerError { + TrackerError::InvalidURL { source: e.0 } } } @@ -78,7 +78,7 @@ impl PeerSource { /// /// Only covers the Tracker variant. Other variants should be /// instantiated directly. - pub fn from_url(url: &Url) -> Result { + pub fn from_url(url: &Uri) -> Result { Ok(Tracker::from_url(url)?.to_peer_source()) } @@ -90,15 +90,15 @@ impl PeerSource { impl Tracker { /// Generate a new Tracker from a given string URL. pub fn new(url: &str) -> Result { - let url = Url::parse(url)?; + let url = Uri::parse(url.to_string())?; Tracker::from_url(&url) } /// Generate a new Tracker from a parsed URL. /// /// Will fail if scheme is not "http", "https", "wss" or "udp". - pub fn from_url(url: &Url) -> Result { - let scheme = match url.scheme() { + pub fn from_url(url: &Uri) -> Result { + let scheme = match url.scheme().as_str() { "http" | "https" => TrackerScheme::Http, "wss" => TrackerScheme::Websocket, "udp" => TrackerScheme::UDP,