Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- `MagnetLink` now refuses to parse strings that contain a newline (`\n`), producing
a `MagnetLinkError::InvalidURINewLine` error
- `MagnetLink::from_url`, `PeerSource::from_url`, and `Tracker::from_url` now take a
`fluent_uri::Uri<String>` instead of a `url::Url` previously
- all error types with an `InvalidURL` variant now have `fluent_uri::ParseError`
as source instead of `url::ParseError` previously

### Added

- `MagnetLink` implements `Display`, so it can be converted to a string again
using `MagnetLink::to_string`.
- `MagnetLink::unsafe_parse_query` allows iterating carefully around magnet link
query key/values
- Added new `MagnetLinkError` variants to be more precise about what's wrong with
a parsed magnet link.

## Version 0.3.2 (2025-08-29)

Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ sha1 = "0.10"
sha256 = "1.5"
rustc-hex = "2.1"
serde = { version = "1", features = [ "derive" ] }
url = "2.5"
fluent-uri = { git = "https://github.com/yescallop/fluent-uri-rs", rev = "5ad3b65" }

[dev-dependencies]
serde_json = "1"
Expand Down
155 changes: 138 additions & 17 deletions src/magnet.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
use url::Url;
use fluent_uri::pct_enc::{encoder::Query, EStr};
use fluent_uri::{ParseError as UriParseError, Uri};

use crate::{InfoHash, InfoHashError, TorrentID};

use std::string::FromUtf8Error;

/// Error occurred during parsing a [`MagnetLink`](crate::magnet::MagnetLink).
#[derive(Clone, Debug, PartialEq)]
pub enum MagnetLinkError {
/// The URI was not valid according to [`Url::parse`](url::Url::parse).
InvalidURI { source: url::ParseError },
/// The URI was not valid according to [`Uri::parse`](fluent_uri::Uri::parse).
InvalidURI { source: UriParseError },
/// The URI does not contain a query.
InvalidURINoQuery,
/// The URI query contains non-UTF8 chars
InvalidURIQueryUnicode { source: FromUtf8Error },
/// The URI query contains a key without a value
InvalidURIQueryEmptyValue { key: String },
/// The URI query contains a non-urlencoded `?` beyond the query declaration
InvalidURIQueryInterrogation,
/// The URI contains a newline
InvalidURINewLine,
/// The URI scheme was not `magnet`
Expand All @@ -19,6 +30,8 @@ pub enum MagnetLinkError {
InvalidHash { source: InfoHashError },
/// Too many hashes were found in the magnet URI, expected two at most.
TooManyHashes { number: usize },
/// There were two or more `dn` declarations in the magnet query.
DuplicateName,
/// No name was contained in the magnet URI. This is technically allowed by
/// some implementations, but should not be encouraged/supported.
#[cfg(feature = "magnet_force_name")]
Expand All @@ -31,6 +44,18 @@ impl std::fmt::Display for MagnetLinkError {
MagnetLinkError::InvalidURI { source } => {
write!(f, "Invalid URI: {source}")
}
MagnetLinkError::InvalidURINoQuery => {
write!(f, "Invalid URI: no query string")
}
MagnetLinkError::InvalidURIQueryEmptyValue { key } => {
write!(f, "Invalid URI: query has key {key} with no value")
}
MagnetLinkError::InvalidURIQueryUnicode { .. } => {
write!(f, "Invalid URI: the query part contains non-utf8 chars")
}
MagnetLinkError::InvalidURIQueryInterrogation => {
write!(f, "Invalid URI: the query part should only contain one `?`")
}
MagnetLinkError::InvalidURINewLine => {
write!(f, "Invalid URI: newlines are not allowed in magnet links")
}
Expand All @@ -46,6 +71,12 @@ impl std::fmt::Display for MagnetLinkError {
MagnetLinkError::TooManyHashes { number } => {
write!(f, "Too many hashes ({number})")
}
MagnetLinkError::DuplicateName => {
write!(
f,
"Too many name declarations for the magnet, only expecting one."
)
}
#[cfg(feature = "magnet_force_name")]
MagnetLinkError::NoNameFound => {
write!(f, "No name found")
Expand All @@ -60,9 +91,15 @@ impl From<InfoHashError> for MagnetLinkError {
}
}

impl From<url::ParseError> for MagnetLinkError {
fn from(e: url::ParseError) -> MagnetLinkError {
MagnetLinkError::InvalidURI { source: e }
impl<Input> From<(UriParseError, Input)> for MagnetLinkError {
fn from(e: (UriParseError, Input)) -> MagnetLinkError {
MagnetLinkError::InvalidURI { source: e.0 }
}
}

impl From<FromUtf8Error> for MagnetLinkError {
fn from(e: FromUtf8Error) -> MagnetLinkError {
MagnetLinkError::InvalidURIQueryUnicode { source: e }
}
}

Expand All @@ -71,6 +108,7 @@ impl std::error::Error for MagnetLinkError {
match self {
MagnetLinkError::InvalidURI { source } => Some(source),
MagnetLinkError::InvalidHash { source } => Some(source),
// MagnetLinkError::InvalidURIQueryUnicode { source } => Some(source),
_ => None,
}
}
Expand All @@ -80,11 +118,19 @@ impl std::error::Error for MagnetLinkError {
///
/// The MagnetLink can provide information about the torrent
/// [`name`](crate::magnet::MagnetLink::name) and [`hash`](crate::magnet::MagnetLink::hash).
/// Other fields can be contained in the magnet URI, as explained [on Wikipedia](https://en.wikipedia.org/wiki/Magnet_URI_scheme). However,
/// they are currently not exposed by this library.
///
/// More information is specified in [BEP-0009](https://bittorrent.org/beps/bep_0009.html), and
/// even more appears in the wild, as explained [on Wikipedia](https://en.wikipedia.org/wiki/Magnet_URI_scheme).
#[derive(Clone, Debug)]
pub struct MagnetLink {
/// Only mandatory field for magnet link parsing, unless the
/// `magnet_force_name` crate feature is enabled.
hash: InfoHash,
/// Original query string from which the magnet was parsed.
/// Used to format the magnet link back to a string.
query: String,
/// Name of the torrent, which may be empty unless
/// `magnet_force_name` crate feature is enabled.
name: String,
}

Expand All @@ -97,7 +143,8 @@ impl MagnetLink {
if s.contains('\n') {
return Err(MagnetLinkError::InvalidURINewLine);
}
let u = Url::parse(s)?;

let u = Uri::parse(s.to_string())?;
MagnetLink::from_url(&u)
}

Expand All @@ -110,8 +157,8 @@ impl MagnetLink {
/// `urn:btmh:1220` for v2 infohash)
/// - more than one hash of the same type was found
/// - the hashes were not valid according to [`InfoHash::new`](crate::hash::InfoHash::new)
pub fn from_url(u: &Url) -> Result<MagnetLink, MagnetLinkError> {
if u.scheme() != "magnet" {
pub fn from_url(u: &Uri<String>) -> Result<MagnetLink, MagnetLinkError> {
if u.scheme().as_str() != "magnet" {
return Err(MagnetLinkError::InvalidScheme {
scheme: u.scheme().to_string(),
});
Expand All @@ -120,10 +167,23 @@ impl MagnetLink {
let mut name = String::new();
let mut hashes: Vec<String> = Vec::new();

for (key, val) in u.query_pairs() {
// Deref cow into str then reference it
match &*key {
let query = u.query().ok_or(MagnetLinkError::InvalidURINoQuery)?;
for (key, val) in Self::unsafe_parse_query(query)? {
// magnets should not allow unescaped ? in query value
if val.as_str().contains('?') {
return Err(MagnetLinkError::InvalidURIQueryInterrogation);
}

// magnets should not allow empty query values
if val.is_empty() {
return Err(MagnetLinkError::InvalidURIQueryEmptyValue {
key: key.as_str().to_string(),
});
}

match key.as_str() {
"xt" => {
let val = val.as_str();
if val.starts_with("urn:btih:") {
// Infohash v1
hashes.push(val.strip_prefix("urn:btih:").unwrap().to_string());
Expand All @@ -133,9 +193,22 @@ impl MagnetLink {
}
}
"dn" => {
name.push_str(&val);
if !name.is_empty() {
return Err(MagnetLinkError::DuplicateName);
}
name = val
.decode()
.into_string()?
// fluent_uri explicitly does not decode U+002B (`+`) as a space
.replace('+', " ")
.to_owned();
}
"tr" => {
// TODO: trackers
}
_ => {
continue;
}
_ => continue,
}
}

Expand Down Expand Up @@ -171,11 +244,40 @@ impl MagnetLink {
};

Ok(MagnetLink {
name,
hash: final_hash,
name: name.to_string(),
query: query.as_str().to_string(),
})
}

/// Parse the query in a list of key->value entries with a percent-decoder attached.
///
/// The results can be accessed raw with [EStr::as_str()] and percent-decoded with [EStr::decode].
///
/// This method only fails if the magnet query is empty (`magnet:`), but may produce unexpected
/// results because it does not apply magnet-specific sanitation.
///
/// This method has a dangerous-sounding name because of percent-encoding.
/// If you aren't careful, you may end up with garbage data. This method
/// is not actually memory-unsafe.
///
/// For example:
///
/// - a key without a value may be returned
/// - duplicate entries may be returned (such as a double magnet name)
/// - a value with an unencoded `?` may be returned
#[allow(clippy::type_complexity)]
pub fn unsafe_parse_query(
query: &EStr<Query>,
) -> Result<Vec<(&EStr<Query>, &EStr<Query>)>, MagnetLinkError> {
let pairs: Vec<(&EStr<Query>, &EStr<Query>)> = query
.split('&')
.map(|s| s.split_once('=').unwrap_or((s, EStr::EMPTY)))
.collect();

Ok(pairs)
}

/// Returns the [`InfoHash`](crate::hash::InfoHash) contained in the MagnetLink
pub fn hash(&self) -> &InfoHash {
&self.hash
Expand All @@ -195,6 +297,12 @@ impl MagnetLink {
}
}

impl std::fmt::Display for MagnetLink {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "magnet:?{}", self.query)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -350,4 +458,17 @@ mod tests {

assert_eq!(res.unwrap_err(), MagnetLinkError::InvalidURINewLine,);
}

#[test]
fn survives_roundtrip() {
// Here we test that parsing a magnet then displaying it again
// will produce exactly the same output.
let magnet_url =
Uri::parse(std::fs::read_to_string("tests/bittorrent-v2-test.magnet").unwrap())
.unwrap();
let magnet = MagnetLink::from_url(&magnet_url).unwrap();

let magnet_str = magnet.to_string();
assert_eq!(&magnet_url.to_string(), &magnet_str);
}
}
18 changes: 9 additions & 9 deletions src/tracker.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use url::Url;
use fluent_uri::{ParseError as UriParseError, Uri};

/// A source of peers. Can be a [`Tracker`](crate::tracker::Tracker) or a decentralized source.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
Expand Down Expand Up @@ -37,7 +37,7 @@ pub enum TrackerScheme {
/// Error occurred during parsing a [`Tracker`](crate::tracker::Tracker).
#[derive(Clone, Debug, PartialEq)]
pub enum TrackerError {
InvalidURL { source: url::ParseError },
InvalidURL { source: UriParseError },
InvalidScheme { scheme: String },
}

Expand All @@ -59,9 +59,9 @@ impl std::error::Error for TrackerError {
}
}

impl From<url::ParseError> for TrackerError {
fn from(e: url::ParseError) -> TrackerError {
TrackerError::InvalidURL { source: e }
impl<Input> From<(UriParseError, Input)> for TrackerError {
fn from(e: (UriParseError, Input)) -> TrackerError {
TrackerError::InvalidURL { source: e.0 }
}
}

Expand All @@ -78,7 +78,7 @@ impl PeerSource {
///
/// Only covers the Tracker variant. Other variants should be
/// instantiated directly.
pub fn from_url(url: &Url) -> Result<PeerSource, TrackerError> {
pub fn from_url(url: &Uri<String>) -> Result<PeerSource, TrackerError> {
Ok(Tracker::from_url(url)?.to_peer_source())
}

Expand All @@ -90,15 +90,15 @@ impl PeerSource {
impl Tracker {
/// Generate a new Tracker from a given string URL.
pub fn new(url: &str) -> Result<Tracker, TrackerError> {
let url = Url::parse(url)?;
let url = Uri::parse(url.to_string())?;
Tracker::from_url(&url)
}

/// Generate a new Tracker from a parsed URL.
///
/// Will fail if scheme is not "http", "https", "wss" or "udp".
pub fn from_url(url: &Url) -> Result<Tracker, TrackerError> {
let scheme = match url.scheme() {
pub fn from_url(url: &Uri<String>) -> Result<Tracker, TrackerError> {
let scheme = match url.scheme().as_str() {
"http" | "https" => TrackerScheme::Http,
"wss" => TrackerScheme::Websocket,
"udp" => TrackerScheme::UDP,
Expand Down