Skip to content

Commit 0797f86

Browse files
committed
feat: Read torrent content files
1 parent 4c5ea80 commit 0797f86

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+521
-8
lines changed

CHANGELOG.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## UNRELEASED (YYYY-MM-DD)
9+
10+
This release focuses on supporting listing files contained in torrents. This is not implemented for magnet files, but is implemented for `TorrentFile` and will be implemented in [hightorrent_api](https://github.com/angrynode/hightorrent_api) for the QBittorrent backend.
11+
12+
### Added
13+
14+
- `DecodedInfo.piece_length` contains the torrent piece length in bytes, with a maximum supported size of `536854528` like in libtorrent
15+
- `TorrentContent` represents a file in a torrent ; `ToTorrentContent` is a trait enabling specialized representations to be turned into a backend-agnostic `TorrentContent` ; padding files are ignored when producing a list of content files
16+
- `DecodedTorrent::files()` produces the file list in the torrent (only v1 torrents supported for now)
17+
18+
### Changed
19+
20+
- Not having a `piece length` info field in a torrent produces an error ; so does having a size exceeding `536854528` bytes
21+
- Having `/` or `..` in a content file part produces a `TorrentFileError::InvalidContentPath`
22+
23+
### Meta
24+
25+
- Added more test cases from arvidn/libtorrent to make sure we don't allow parsing invalid torrents
26+
827
## Version 0.2.0 (2024-09-02)
928

1029
### Added

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ Contributions are welcome. Here are the steps to make sure your contribution get
3939

4040
If you don't have those dependencies (`just`, `cargo-rdme`), you can setup a temporary development environment with [Nix](https://nixos.org/) by running `nix develop`.
4141

42+
# Running tests
43+
44+
From the repository root, run `cargo test`. To run advanced tests using rust nightly as used in CI, run `scripts/pre-commit.sh`. To run the test verifying that error cases from libtorrent are properly handled (which is normally ignored), run `cargo test -- --ignored`.
45+
4246
# Possible improvements for v1
4347

4448
- [x] hand-implement errors to remove snafu dependency
@@ -48,6 +52,7 @@ If you don't have those dependencies (`just`, `cargo-rdme`), you can setup a tem
4852
- [ ] implement MultiTarget filtering, including boolean logic (AND/OR/XOR)
4953
- [ ] provide more information for TorrentFile (eg. files list)
5054
- [ ] consider replacing Torrent with a trait
55+
- [ ] implement more libtorrent tests (28/41 wrongful successes as of 03/28/2025)
5156

5257
# License
5358

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ mod magnet;
3434
pub use magnet::{MagnetLink, MagnetLinkError};
3535

3636
mod torrent;
37-
pub use torrent::{ToTorrent, Torrent};
37+
pub use torrent::{ToTorrent, ToTorrentContent, Torrent, TorrentContent};
3838

3939
mod torrent_file;
4040
pub use torrent_file::{TorrentFile, TorrentFileError};

src/torrent.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use serde::Deserialize;
22

33
use crate::{InfoHash, TorrentID};
44

5+
use std::path::PathBuf;
6+
57
/// Turn a backend-specific torrent into an agnostic [`Torrent`](crate::torrent::Torrent).
68
pub trait ToTorrent {
79
fn to_torrent(&self) -> Torrent;
@@ -11,7 +13,6 @@ pub trait ToTorrent {
1113
/// An abstract torrent, loaded from any backend that implements
1214
/// [ToTorrent](crate::torrent::ToTorrent).
1315
pub struct Torrent {
14-
//pub hash: TruncatedHash,
1516
pub name: String,
1617
pub path: String,
1718
pub date_start: i64,
@@ -47,3 +48,30 @@ impl Torrent {
4748
}
4849
}
4950
}
51+
52+
pub trait ToTorrentContent {
53+
fn to_torrent_content(&self) -> TorrentContent;
54+
}
55+
56+
/// A file contained inside a [Torrent].
57+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)]
58+
pub struct TorrentContent {
59+
/// File path, relative from the torrent root.
60+
pub path: PathBuf,
61+
/// Size of the file in bytes,
62+
pub size: u64,
63+
}
64+
65+
impl std::cmp::PartialOrd for TorrentContent {
66+
// Sort by alphabetical order
67+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
68+
Some(self.cmp(other))
69+
}
70+
}
71+
72+
impl std::cmp::Ord for TorrentContent {
73+
// Sort by alphabetical order
74+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
75+
self.path.cmp(&other.path)
76+
}
77+
}

src/torrent_error.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ pub enum TorrentError {
4848
MismatchedMagnetHashType { hash: String, hash_type: String },
4949
#[snafu(display("Unsupported magnet hash type: {}", hash_type))]
5050
UnsupportedMagnetHashType { hash_type: String },
51+
#[snafu(display("Invalid content file path in torrent: {}", String))]
52+
InvalidContentPath { path: String },
5153
}
5254

5355
impl From<crate::info_hash::InfoHashError> for TorrentError {

src/torrent_file.rs

Lines changed: 213 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,14 @@ use serde::{Deserialize, Serialize};
44
use sha1::{Digest, Sha1};
55

66
use std::collections::HashMap;
7+
use std::path::PathBuf;
78

8-
use crate::{InfoHash, InfoHashError, TorrentID};
9+
use crate::{InfoHash, InfoHashError, TorrentContent, TorrentID};
10+
11+
/// Maximum size of the `piece length` entry in info dict for V2 torrents.
12+
///
13+
/// Magic number copied over [from libtorrent](https://github.com/arvidn/libtorrent/blob/1b9dc7462f22bc1513464d01c72281280a6a5f97/include/libtorrent/file_storage.hpp#L246).
14+
pub const MAXIMUM_PIECE_LENGTH: u32 = 536854528;
915

1016
/// Error occurred during parsing a [`TorrentFile`](crate::torrent_file::TorrentFile).
1117
#[derive(Clone, Debug, PartialEq)]
@@ -16,6 +22,9 @@ pub enum TorrentFileError {
1622
NotATorrent { reason: String },
1723
WrongVersion { version: u64 },
1824
InvalidHash { source: InfoHashError },
25+
InvalidContentPath { path: String },
26+
MissingPieceLength,
27+
BadPieceLength { piece_length: u32 },
1928
}
2029

2130
impl std::fmt::Display for TorrentFileError {
@@ -32,6 +41,15 @@ impl std::fmt::Display for TorrentFileError {
3241
"Wrong torrent version: {version}, only v1 and v2 are supported)"
3342
),
3443
TorrentFileError::InvalidHash { source } => write!(f, "Invalid hash: {source}"),
44+
TorrentFileError::InvalidContentPath { path } => {
45+
write!(f, "Invalid content file path in torrent: {path}")
46+
}
47+
TorrentFileError::MissingPieceLength => {
48+
write!(f, "No \'piece length\' field found in info dict")
49+
}
50+
TorrentFileError::BadPieceLength { piece_length } => {
51+
write!(f, "Torrent \'piece length\' is too big: {}", piece_length)
52+
}
3553
}
3654
}
3755
}
@@ -66,12 +84,12 @@ impl std::error::Error for TorrentFileError {
6684
/// [`name`](crate::torrent_file::TorrentFile::name) and
6785
/// [`hash`](crate::torrent_file::TorrentFile::hash). Other fields could be supported, but are not
6886
/// currently implemented by this library.
69-
///
70-
/// TODO: Implement files() method to return list of files
7187
#[derive(Clone, Debug, Serialize, Deserialize)]
7288
pub struct TorrentFile {
73-
hash: InfoHash,
74-
name: String,
89+
pub hash: InfoHash,
90+
pub name: String,
91+
// Kept for further analysis
92+
pub decoded: DecodedTorrent,
7593
}
7694

7795
/// A parsed bencode-decoded value, to ensure torrent-like structure.
@@ -89,6 +107,98 @@ pub struct DecodedTorrent {
89107
extra: HashMap<String, BencodeValue>,
90108
}
91109

110+
impl DecodedTorrent {
111+
pub fn files(&self) -> Result<Vec<TorrentContent>, TorrentFileError> {
112+
if self.info.files.is_none() {
113+
if self.info.file_tree.is_none() {
114+
// V1 torrent with single file
115+
Ok(vec![TorrentContent {
116+
path: PathBuf::from(&self.info.name),
117+
size: self.info.length.unwrap(),
118+
}])
119+
} else {
120+
todo!("v2 torrent files");
121+
}
122+
} else {
123+
// V1 torrent with multiple files
124+
let mut files: Vec<TorrentContent> = vec![];
125+
for file in self.info.files.as_ref().unwrap() {
126+
// TODO: error
127+
let f: UnsafeV1FileContent = bt_bencode::from_value(file.clone()).unwrap();
128+
if let Some(parsed_file) = f.to_torrent_content()? {
129+
files.push(parsed_file);
130+
}
131+
}
132+
133+
// Sort files by alphabetical order
134+
files.sort();
135+
Ok(files)
136+
}
137+
}
138+
}
139+
140+
/// Raw file path described within a Bittorrent v1 torrent file.
141+
///
142+
/// It has not been sanitized, for example to prevent path traversal attacks. You should not be using this in your API;
143+
/// use [TorrentContent] instead.
144+
#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)]
145+
pub struct UnsafeV1FileContent {
146+
/// Raw path segments from the torrent, may contain directory escapes (like `..`)
147+
#[serde(rename = "path")]
148+
pub raw_paths: Vec<String>,
149+
/// File length in bytes
150+
pub length: u64,
151+
/// Extended file attributes as defined in [BEP-0047](https://www.bittorrent.org/beps/bep_0047.html)
152+
///
153+
/// Can contain several characters:
154+
///
155+
/// - p for padding files
156+
/// - l for symlinks
157+
/// - x for executables
158+
/// - h for hidden files
159+
#[serde(default)]
160+
pub attr: String,
161+
}
162+
163+
impl UnsafeV1FileContent {
164+
/// Tries to parse [TorrentContent].
165+
///
166+
/// Fails if the data is invalid (eg. path traversal), produces
167+
/// Ok(None) when the file is a padding file.
168+
pub fn to_torrent_content(&self) -> Result<Option<TorrentContent>, TorrentFileError> {
169+
if self.attr.contains('p') {
170+
return Ok(None);
171+
}
172+
173+
// Parse the raw path parts omitting weird directory shenanigans
174+
let mut path = PathBuf::new();
175+
for p in &self.raw_paths {
176+
if p.contains('/') {
177+
return Err(TorrentFileError::InvalidContentPath {
178+
path: p.to_string(),
179+
});
180+
}
181+
182+
if p == ".." {
183+
return Err(TorrentFileError::InvalidContentPath {
184+
path: p.to_string(),
185+
});
186+
}
187+
188+
if p == "." {
189+
continue;
190+
}
191+
192+
path.push(p);
193+
}
194+
195+
Ok(Some(TorrentContent {
196+
path,
197+
size: self.length,
198+
}))
199+
}
200+
}
201+
92202
/// An info dict contained in a [`DecodedTorrent`](crate::torrent_file::DecodedTorrent).
93203
///
94204
/// Only cares about torrent version, name, and files, but other fields are preseved in an `extra`
@@ -103,6 +213,10 @@ pub struct DecodedInfo {
103213

104214
name: String,
105215

216+
/// Torrent `piece length` as used in v1/v2 torrents
217+
#[serde(rename = "piece length")]
218+
piece_length: u32,
219+
106220
// Torrent v1/hybrid (only for single-file torrents)
107221
#[serde(skip_serializing_if = "Option::is_none")]
108222
length: Option<u64>,
@@ -173,9 +287,19 @@ impl TorrentFile {
173287
}
174288
};
175289

290+
// Sanitize piece length (TODO: make this in type)
291+
if let Some(2) = &torrent.info.version {
292+
if torrent.info.piece_length > MAXIMUM_PIECE_LENGTH {
293+
return Err(TorrentFileError::BadPieceLength {
294+
piece_length: torrent.info.piece_length,
295+
});
296+
}
297+
}
298+
176299
Ok(TorrentFile {
177-
name: torrent.info.name,
300+
name: torrent.info.name.clone(),
178301
hash: infohash,
302+
decoded: torrent,
179303
})
180304
}
181305

@@ -211,6 +335,75 @@ mod tests {
211335
torrent.hash,
212336
InfoHash::V1("c811b41641a09d192b8ed81b14064fff55d85ce3".to_string())
213337
);
338+
assert_eq!(torrent.decoded.files().unwrap().len(), 94);
339+
}
340+
341+
#[test]
342+
fn can_read_torrent_v1_multifile() {
343+
let slice = std::fs::read("tests/libtorrent/good/sample.torrent").unwrap();
344+
let res = TorrentFile::from_slice(&slice);
345+
println!("{:?}", res);
346+
assert!(res.is_ok());
347+
let torrent = res.unwrap();
348+
assert_eq!(&torrent.name, "sample");
349+
assert_eq!(
350+
torrent.hash,
351+
InfoHash::V1("58d8d15a4eb3bd9afabc9cee2564f78192777edb".to_string())
352+
);
353+
assert_eq!(
354+
torrent.decoded.files().unwrap(),
355+
vec!(
356+
TorrentContent {
357+
path: PathBuf::from("text_file.txt"),
358+
size: 20,
359+
},
360+
TorrentContent {
361+
path: PathBuf::from("text_file2.txt"),
362+
size: 25,
363+
}
364+
),
365+
);
366+
}
367+
368+
#[test]
369+
fn can_read_torrent_v1_wrongpath() {
370+
let slice = std::fs::read("tests/libtorrent/good/parent_path.torrent").unwrap();
371+
let res = TorrentFile::from_slice(&slice);
372+
println!("{:?}", res);
373+
assert!(res.is_ok());
374+
let torrent = res.unwrap();
375+
assert_eq!(&torrent.name, "temp");
376+
assert_eq!(
377+
torrent.hash,
378+
InfoHash::V1("9e1111f1ee4966f7d06d398f1d58e00ad150657a".to_string())
379+
);
380+
assert_eq!(
381+
torrent.decoded.files().unwrap_err(),
382+
TorrentFileError::InvalidContentPath {
383+
path: "..".to_string()
384+
},
385+
);
386+
}
387+
388+
#[test]
389+
fn can_read_torrent_v1_singlepath() {
390+
let slice = std::fs::read("tests/libtorrent/good/base.torrent").unwrap();
391+
let res = TorrentFile::from_slice(&slice);
392+
println!("{:?}", res);
393+
assert!(res.is_ok());
394+
let torrent = res.unwrap();
395+
assert_eq!(&torrent.name, "temp");
396+
assert_eq!(
397+
torrent.hash,
398+
InfoHash::V1("c0fda1edafdbdbb96443424e0b3899af7159d10e".to_string())
399+
);
400+
assert_eq!(
401+
torrent.decoded.files().unwrap(),
402+
vec!(TorrentContent {
403+
path: PathBuf::from("temp"),
404+
size: 425,
405+
}),
406+
);
214407
}
215408

216409
#[test]
@@ -243,4 +436,18 @@ mod tests {
243436
))
244437
);
245438
}
439+
440+
#[test]
441+
fn v1_piece_len() {
442+
let slice = std::fs::read("tests/libtorrent/bad/negative_piece_len.torrent").unwrap();
443+
let res = TorrentFile::from_slice(&slice);
444+
assert!(res.is_err());
445+
}
446+
447+
#[test]
448+
fn v2_piece_len() {
449+
let slice = std::fs::read("tests/libtorrent/bad/v2_piece_size.torrent").unwrap();
450+
let res = TorrentFile::from_slice(&slice);
451+
assert!(res.is_err());
452+
}
246453
}

0 commit comments

Comments
 (0)