From 8ea5a02205b06ebf2e1a34f5eb2474780b800fe7 Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 24 Sep 2024 13:47:26 +0200 Subject: [PATCH] Use vendored METADATA parsing --- Cargo.lock | 96 +-------- Cargo.toml | 1 - crates/pypi-types/src/metadata/metadata23.rs | 199 +++++++++++++++++++ crates/pypi-types/src/metadata/mod.rs | 10 +- crates/uv-publish/Cargo.toml | 2 +- crates/uv-publish/src/lib.rs | 10 +- 6 files changed, 214 insertions(+), 104 deletions(-) create mode 100644 crates/pypi-types/src/metadata/metadata23.rs diff --git a/Cargo.lock b/Cargo.lock index a038a90c3dfa2..c707253d7e21a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,18 +23,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -44,12 +32,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" - [[package]] name = "anes" version = "0.1.6" @@ -599,16 +581,6 @@ dependencies = [ "encoding_rs", ] -[[package]] -name = "chumsky" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" -dependencies = [ - "hashbrown", - "stacker", -] - [[package]] name = "ciborium" version = "0.2.2" @@ -1481,10 +1453,6 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] [[package]] name = "heck" @@ -2719,15 +2687,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "psm" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" -dependencies = [ - "cc", -] - [[package]] name = "ptr_meta" version = "0.3.0" @@ -2859,21 +2818,6 @@ dependencies = [ "uv-normalize", ] -[[package]] -name = "python-pkginfo" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3f3f0d552c7efdde2b6898bf21b49c4e76b3e6071ff196dfe52109804db896" -dependencies = [ - "flate2", - "fs-err", - "mailparse", - "rfc2047-decoder", - "tar", - "thiserror", - "zip", -] - [[package]] name = "quinn" version = "0.11.5" @@ -3246,20 +3190,6 @@ dependencies = [ "rand", ] -[[package]] -name = "rfc2047-decoder" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e90a668c463c412c3118ae1883e18b53d812c349f5af7a06de3ba4bb0c17cc73" -dependencies = [ - "base64 0.21.7", - "charset", - "chumsky", - "memchr", - "quoted_printable", - "thiserror", -] - [[package]] name = "rgb" version = "0.8.50" @@ -3766,19 +3696,6 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -[[package]] -name = "stacker" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - [[package]] name = "strict-num" version = "0.1.1" @@ -3893,17 +3810,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddb6b06d20fba9ed21fca3d696ee1b6e870bca0bcf9fa2971f6ae2436de576a" -[[package]] -name = "tar" -version = "0.4.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" -dependencies = [ - "filetime", - "libc", - "xattr", -] - [[package]] name = "target-lexicon" version = "0.12.16" @@ -5111,7 +5017,7 @@ dependencies = [ "insta", "itertools 0.13.0", "krata-tokio-tar", - "python-pkginfo", + "pypi-types", "reqwest", "reqwest-middleware", "rustc-hash", diff --git a/Cargo.toml b/Cargo.toml index 58b8d408537ca..8e0c846977307 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,7 +120,6 @@ proc-macro2 = { version = "1.0.86" } pubgrub = { git = "https://github.com/astral-sh/pubgrub", rev = "388685a8711092971930986644cfed152d1a1f6c" } pyo3 = { version = "0.21.2" } pyo3-log = { version = "0.10.0" } -python-pkginfo = { version = "0.6.3" } quote = { version = "1.0.37" } rayon = { version = "1.10.0" } reflink-copy = { version = "0.1.19" } diff --git a/crates/pypi-types/src/metadata/metadata23.rs b/crates/pypi-types/src/metadata/metadata23.rs new file mode 100644 index 0000000000000..d4d1d034e1eec --- /dev/null +++ b/crates/pypi-types/src/metadata/metadata23.rs @@ -0,0 +1,199 @@ +//! Vendored from + +use crate::metadata::Headers; +use crate::MetadataError; +use std::str; +use std::str::FromStr; + +/// Code Metadata 2.3 as specified in +/// . +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct Metadata23 { + /// Version of the file format; legal values are `1.0`, `1.1`, `1.2`, `2.1`, `2.2` and `2.3`. + pub metadata_version: String, + /// The name of the distribution. + pub name: String, + /// A string containing the distribution’s version number. + pub version: String, + /// A Platform specification describing an operating system supported by the distribution + /// which is not listed in the “Operating System” Trove classifiers. + pub platforms: Vec, + /// Binary distributions containing a PKG-INFO file will use the Supported-Platform field + /// in their metadata to specify the OS and CPU for which the binary distribution was compiled. + pub supported_platforms: Vec, + /// A one-line summary of what the distribution does. + pub summary: Option, + /// A longer description of the distribution that can run to several paragraphs. + pub description: Option, + /// A list of additional keywords, separated by commas, to be used to + /// assist searching for the distribution in a larger catalog. + pub keywords: Option, + /// A string containing the URL for the distribution’s home page. + pub home_page: Option, + /// A string containing the URL from which this version of the distribution can be downloaded. + pub download_url: Option, + /// A string containing the author’s name at a minimum; additional contact information may be provided. + pub author: Option, + /// A string containing the author’s e-mail address. It can contain a name and e-mail address in the legal forms for a RFC-822 `From:` header. + pub author_email: Option, + /// Text indicating the license covering the distribution where the license is not a selection from the `License` Trove classifiers or an SPDX license expression. + pub license: Option, + /// An SPDX expression indicating the license covering the distribution. + pub license_expression: Option, + /// Paths to files containing the text of the licenses covering the distribution. + pub license_files: Vec, + /// Each entry is a string giving a single classification value for the distribution. + pub classifiers: Vec, + /// Each entry contains a string naming some other distutils project required by this distribution. + pub requires_dist: Vec, + /// Each entry contains a string naming a Distutils project which is contained within this distribution. + pub provides_dist: Vec, + /// Each entry contains a string describing a distutils project’s distribution which this distribution renders obsolete, + /// meaning that the two projects should not be installed at the same time. + pub obsoletes_dist: Vec, + /// A string containing the maintainer’s name at a minimum; additional contact information may be provided. + /// + /// Note that this field is intended for use when a project is being maintained by someone other than the original author: + /// it should be omitted if it is identical to `author`. + pub maintainer: Option, + /// A string containing the maintainer’s e-mail address. + /// It can contain a name and e-mail address in the legal forms for a RFC-822 `From:` header. + /// + /// Note that this field is intended for use when a project is being maintained by someone other than the original author: + /// it should be omitted if it is identical to `author_email`. + pub maintainer_email: Option, + /// This field specifies the Python version(s) that the distribution is guaranteed to be compatible with. + pub requires_python: Option, + /// Each entry contains a string describing some dependency in the system that the distribution is to be used. + pub requires_external: Vec, + /// A string containing a browsable URL for the project and a label for it, separated by a comma. + pub project_urls: Vec, + /// A string containing the name of an optional feature. Must be a valid Python identifier. + /// May be used to make a dependency conditional on whether the optional feature has been requested. + pub provides_extras: Vec, + /// A string stating the markup syntax (if any) used in the distribution’s description, + /// so that tools can intelligently render the description. + pub description_content_type: Option, + /// A string containing the name of another core metadata field. + pub dynamic: Vec, +} + +impl Metadata23 { + /// Parse distribution metadata from metadata `MetadataError` + pub fn parse(content: &[u8]) -> Result { + let headers = Headers::parse(content)?; + + let metadata_version = headers + .get_first_value("Metadata-Version") + .ok_or(MetadataError::FieldNotFound("Metadata-Version"))?; + let name = headers + .get_first_value("Name") + .ok_or(MetadataError::FieldNotFound("Name"))?; + let version = headers + .get_first_value("Version") + .ok_or(MetadataError::FieldNotFound("Version"))?; + let platforms = headers.get_all_values("Platform").collect(); + let supported_platforms = headers.get_all_values("Supported-Platform").collect(); + let summary = headers.get_first_value("Summary"); + let body = str::from_utf8(&content[headers.body_start..]) + .map_err(MetadataError::DescriptionEncoding)?; + let description = if body.trim().is_empty() { + headers.get_first_value("Description") + } else { + Some(body.to_string()) + }; + let keywords = headers.get_first_value("Keywords"); + let home_page = headers.get_first_value("Home-Page"); + let download_url = headers.get_first_value("Download-URL"); + let author = headers.get_first_value("Author"); + let author_email = headers.get_first_value("Author-email"); + let license = headers.get_first_value("License"); + let license_expression = headers.get_first_value("License-Expression"); + let license_files = headers.get_all_values("License-File").collect(); + let classifiers = headers.get_all_values("Classifier").collect(); + let requires_dist = headers.get_all_values("Requires-Dist").collect(); + let provides_dist = headers.get_all_values("Provides-Dist").collect(); + let obsoletes_dist = headers.get_all_values("Obsoletes-Dist").collect(); + let maintainer = headers.get_first_value("Maintainer"); + let maintainer_email = headers.get_first_value("Maintainer-email"); + let requires_python = headers.get_first_value("Requires-Python"); + let requires_external = headers.get_all_values("Requires-External").collect(); + let project_urls = headers.get_all_values("Project-URL").collect(); + let provides_extras = headers.get_all_values("Provides-Extra").collect(); + let description_content_type = headers.get_first_value("Description-Content-Type"); + let dynamic = headers.get_all_values("Dynamic").collect(); + Ok(Metadata23 { + metadata_version, + name, + version, + platforms, + supported_platforms, + summary, + description, + keywords, + home_page, + download_url, + author, + author_email, + license, + license_expression, + license_files, + classifiers, + requires_dist, + provides_dist, + obsoletes_dist, + maintainer, + maintainer_email, + requires_python, + requires_external, + project_urls, + provides_extras, + description_content_type, + dynamic, + }) + } +} + +impl FromStr for Metadata23 { + type Err = MetadataError; + + fn from_str(s: &str) -> Result { + Metadata23::parse(s.as_bytes()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::MetadataError; + + #[test] + fn test_parse_from_str() { + let s = "Metadata-Version: 1.0"; + let meta: Result = s.parse(); + assert!(matches!(meta, Err(MetadataError::FieldNotFound("Name")))); + + let s = "Metadata-Version: 1.0\nName: asdf"; + let meta = Metadata23::parse(s.as_bytes()); + assert!(matches!(meta, Err(MetadataError::FieldNotFound("Version")))); + + let s = "Metadata-Version: 1.0\nName: asdf\nVersion: 1.0"; + let meta = Metadata23::parse(s.as_bytes()).unwrap(); + assert_eq!(meta.metadata_version, "1.0"); + assert_eq!(meta.name, "asdf"); + assert_eq!(meta.version, "1.0"); + + let s = "Metadata-Version: 1.0\nName: asdf\nVersion: 1.0\nDescription: a Python package"; + let meta: Metadata23 = s.parse().unwrap(); + assert_eq!(meta.description.as_deref(), Some("a Python package")); + + let s = "Metadata-Version: 1.0\nName: asdf\nVersion: 1.0\n\na Python package"; + let meta: Metadata23 = s.parse().unwrap(); + assert_eq!(meta.description.as_deref(), Some("a Python package")); + + let s = "Metadata-Version: 1.0\nName: asdf\nVersion: 1.0\nAuthor: 中文\n\n一个 Python 包"; + let meta: Metadata23 = s.parse().unwrap(); + assert_eq!(meta.author.as_deref(), Some("中文")); + assert_eq!(meta.description.as_deref(), Some("一个 Python 包")); + } +} diff --git a/crates/pypi-types/src/metadata/mod.rs b/crates/pypi-types/src/metadata/mod.rs index cb0ea7cd017cd..cbcbaa47c4476 100644 --- a/crates/pypi-types/src/metadata/mod.rs +++ b/crates/pypi-types/src/metadata/mod.rs @@ -1,5 +1,6 @@ mod metadata10; mod metadata12; +mod metadata23; mod metadata_resolver; mod pyproject_toml; mod requires_txt; @@ -14,6 +15,7 @@ use uv_normalize::InvalidNameError; pub use metadata10::Metadata10; pub use metadata12::Metadata12; +pub use metadata23::Metadata23; pub use metadata_resolver::MetadataResolver; pub use pyproject_toml::RequiresDist; pub use requires_txt::RequiresTxt; @@ -65,13 +67,17 @@ impl From> for MetadataError { #[derive(Debug)] struct Headers<'a> { headers: Vec>, + body_start: usize, } impl<'a> Headers<'a> { /// Parse the headers from the given metadata file content. fn parse(content: &'a [u8]) -> Result { - let (headers, _) = mailparse::parse_headers(content)?; - Ok(Self { headers }) + let (headers, body_start) = mailparse::parse_headers(content)?; + Ok(Self { + headers, + body_start, + }) } /// Return the first value associated with the header with the given name. diff --git a/crates/uv-publish/Cargo.toml b/crates/uv-publish/Cargo.toml index 8f27b030ff191..6e186b978dd33 100644 --- a/crates/uv-publish/Cargo.toml +++ b/crates/uv-publish/Cargo.toml @@ -11,6 +11,7 @@ license.workspace = true [dependencies] distribution-filename = { workspace = true } +pypi-types = { workspace = true } uv-client = { workspace = true } uv-fs = { workspace = true } uv-metadata = { workspace = true } @@ -22,7 +23,6 @@ futures = { workspace = true } glob = { workspace = true } itertools = { workspace = true } krata-tokio-tar = { workspace = true } -python-pkginfo = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } rustc-hash = { workspace = true } diff --git a/crates/uv-publish/src/lib.rs b/crates/uv-publish/src/lib.rs index 00f18ba408ab8..ab23970b9a14d 100644 --- a/crates/uv-publish/src/lib.rs +++ b/crates/uv-publish/src/lib.rs @@ -5,7 +5,7 @@ use fs_err::File; use futures::TryStreamExt; use glob::{glob, GlobError, PatternError}; use itertools::Itertools; -use python_pkginfo::Metadata; +use pypi_types::{Metadata23, MetadataError}; use reqwest::header::AUTHORIZATION; use reqwest::multipart::Part; use reqwest::{Body, Response, StatusCode}; @@ -46,12 +46,12 @@ pub enum PublishError { /// Failure to get the metadata for a specific file. #[derive(Error, Debug)] pub enum PublishPrepareError { - #[error(transparent)] - PkgInfoError(#[from] python_pkginfo::Error), #[error(transparent)] Io(#[from] io::Error), #[error("Failed to read metadata")] Metadata(#[from] uv_metadata::Error), + #[error("Failed to read metadata")] + Metadata23(#[from] MetadataError), #[error("Only files ending in `.tar.gz` are valid source distributions: `{0}`")] InvalidExtension(SourceDistFilename), #[error("No PKG-INFO file found")] @@ -292,7 +292,7 @@ async fn source_dist_pkg_info(file: &Path) -> Result, PublishPrepareErro } } -async fn metadata(file: &Path, filename: &DistFilename) -> Result { +async fn metadata(file: &Path, filename: &DistFilename) -> Result { let contents = match filename { DistFilename::SourceDistFilename(source_dist) => { if source_dist.extension != SourceDistExtension::TarGz { @@ -308,7 +308,7 @@ async fn metadata(file: &Path, filename: &DistFilename) -> Result