Skip to content

Commit

Permalink
Use SmallString for filenames and URLs (#11765)
Browse files Browse the repository at this point in the history
## Summary

These are never mutated, so there's no need to store them as `String`.
  • Loading branch information
charliermarsh authored Feb 25, 2025
1 parent 76c3caf commit 275db06
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 56 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/uv-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ uv-pep440 = { workspace = true }
uv-pep508 = { workspace = true }
uv-platform-tags = { workspace = true }
uv-pypi-types = { workspace = true }
uv-small-str = { workspace = true }
uv-static = { workspace = true }
uv-version = { workspace = true }
uv-warnings = { workspace = true }
Expand Down
17 changes: 11 additions & 6 deletions crates/uv-client/src/flat_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use uv_cache_key::cache_digest;
use uv_distribution_filename::DistFilename;
use uv_distribution_types::{File, FileLocation, IndexUrl, UrlString};
use uv_pypi_types::HashDigests;
use uv_small_str::SmallString;

use crate::cached_client::{CacheControl, CachedClientError};
use crate::html::SimpleHtml;
Expand Down Expand Up @@ -186,10 +187,13 @@ impl<'a> FlatIndexClient<'a> {
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;

// Convert to a reference-counted string.
let base = SmallString::from(base.as_str());

let unarchived: Vec<File> = files
.into_iter()
.filter_map(|file| {
match File::try_from(file, base.as_url()) {
match File::try_from(file, &base) {
Ok(file) => Some(file),
Err(err) => {
// Ignore files with unparsable version specifiers.
Expand Down Expand Up @@ -270,10 +274,11 @@ impl<'a> FlatIndexClient<'a> {
}
}

let Ok(filename) = entry.file_name().into_string() else {
let filename = entry.file_name();
let Some(filename) = filename.to_str() else {
warn!(
"Skipping non-UTF-8 filename in `--find-links` directory: {}",
entry.file_name().to_string_lossy()
filename.to_string_lossy()
);
continue;
};
Expand All @@ -283,16 +288,16 @@ impl<'a> FlatIndexClient<'a> {

let file = File {
dist_info_metadata: false,
filename: filename.to_string(),
filename: filename.into(),
hashes: HashDigests::empty(),
requires_python: None,
size: None,
upload_time_utc_ms: None,
url: FileLocation::AbsoluteUrl(UrlString::from(url)),
url: FileLocation::AbsoluteUrl(UrlString::from(&url)),
yanked: None,
};

let Some(filename) = DistFilename::try_from_normalized_filename(&filename) else {
let Some(filename) = DistFilename::try_from_normalized_filename(filename) else {
debug!(
"Ignoring `--find-links` entry (expected a wheel or source distribution filename): {}",
entry.path().display()
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-client/src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ impl SimpleHtml {
yanked,
requires_python,
hashes,
filename: filename.to_string(),
url: decoded.to_string(),
filename: filename.into(),
url: decoded.into(),
size,
upload_time,
}))
Expand Down
22 changes: 12 additions & 10 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ use tokio::sync::Semaphore;
use tracing::{info_span, instrument, trace, warn, Instrument};
use url::Url;

use crate::base_client::{BaseClientBuilder, ExtraMiddleware};
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::rkyvutil::OwnedArchive;
use crate::{BaseClient, CachedClient, CachedClientError, Error, ErrorKind};
use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use uv_configuration::KeyringProviderType;
use uv_configuration::{IndexStrategy, TrustedHost};
Expand All @@ -27,13 +33,7 @@ use uv_pep440::Version;
use uv_pep508::MarkerEnvironment;
use uv_platform_tags::Platform;
use uv_pypi_types::{ResolutionMetadata, SimpleJson};

use crate::base_client::{BaseClientBuilder, ExtraMiddleware};
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::rkyvutil::OwnedArchive;
use crate::{BaseClient, CachedClient, CachedClientError, Error, ErrorKind};
use uv_small_str::SmallString;

/// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -894,10 +894,12 @@ impl SimpleMetadata {
fn from_files(files: Vec<uv_pypi_types::File>, package_name: &PackageName, base: &Url) -> Self {
let mut map: BTreeMap<Version, VersionFiles> = BTreeMap::default();

// Convert to a reference-counted string.
let base = SmallString::from(base.as_str());

// Group the distributions by version and kind
for file in files {
let Some(filename) =
DistFilename::try_from_filename(file.filename.as_str(), package_name)
let Some(filename) = DistFilename::try_from_filename(&file.filename, package_name)
else {
warn!("Skipping file for {package_name}: {}", file.filename);
continue;
Expand All @@ -906,7 +908,7 @@ impl SimpleMetadata {
DistFilename::SourceDistFilename(ref inner) => &inner.version,
DistFilename::WheelFilename(ref inner) => &inner.version,
};
let file = match File::try_from(file, base) {
let file = match File::try_from(file, &base) {
Ok(file) => file,
Err(err) => {
// Ignore files with unparsable version specifiers.
Expand Down
1 change: 1 addition & 0 deletions crates/uv-distribution-types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ uv-pep440 = { workspace = true }
uv-pep508 = { workspace = true }
uv-platform-tags = { workspace = true }
uv-pypi-types = { workspace = true }
uv-small-str = { workspace = true }

arcstr = { workspace = true }
bitflags = { workspace = true }
Expand Down
57 changes: 25 additions & 32 deletions crates/uv-distribution-types/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use url::Url;
use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError};
use uv_pep508::split_scheme;
use uv_pypi_types::{CoreMetadata, HashDigests, Yanked};
use uv_small_str::SmallString;

/// Error converting [`uv_pypi_types::File`] to [`distribution_type::File`].
#[derive(Debug, thiserror::Error)]
Expand All @@ -23,7 +24,7 @@ pub enum FileConversionError {
#[rkyv(derive(Debug))]
pub struct File {
pub dist_info_metadata: bool,
pub filename: String,
pub filename: SmallString,
pub hashes: HashDigests,
pub requires_python: Option<VersionSpecifiers>,
pub size: Option<u64>,
Expand All @@ -38,7 +39,10 @@ pub struct File {

impl File {
/// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
pub fn try_from(file: uv_pypi_types::File, base: &Url) -> Result<Self, FileConversionError> {
pub fn try_from(
file: uv_pypi_types::File,
base: &SmallString,
) -> Result<Self, FileConversionError> {
Ok(Self {
dist_info_metadata: file
.core_metadata
Expand All @@ -56,7 +60,7 @@ impl File {
upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
url: match split_scheme(&file.url) {
Some(..) => FileLocation::AbsoluteUrl(UrlString::new(file.url)),
None => FileLocation::RelativeUrl(base.to_string(), file.url),
None => FileLocation::RelativeUrl(base.clone(), file.url),
},
yanked: file.yanked,
})
Expand All @@ -68,7 +72,7 @@ impl File {
#[rkyv(derive(Debug))]
pub enum FileLocation {
/// URL relative to the base URL.
RelativeUrl(String, String),
RelativeUrl(SmallString, SmallString),
/// Absolute URL.
AbsoluteUrl(UrlString),
}
Expand All @@ -89,30 +93,19 @@ impl FileLocation {
match *self {
FileLocation::RelativeUrl(ref base, ref path) => {
let base_url = Url::parse(base).map_err(|err| ToUrlError::InvalidBase {
base: base.clone(),
base: base.to_string(),
err,
})?;
let joined = base_url.join(path).map_err(|err| ToUrlError::InvalidJoin {
base: base.clone(),
path: path.clone(),
base: base.to_string(),
path: path.to_string(),
err,
})?;
Ok(joined)
}
FileLocation::AbsoluteUrl(ref absolute) => absolute.to_url(),
}
}

/// Convert this location to a URL.
///
/// This method is identical to [`FileLocation::to_url`] except it avoids parsing absolute URLs
/// as they are already guaranteed to be valid.
pub fn to_url_string(&self) -> Result<UrlString, ToUrlError> {
match *self {
FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.clone()),
FileLocation::RelativeUrl(_, _) => Ok(self.to_url()?.into()),
}
}
}

impl Display for FileLocation {
Expand Down Expand Up @@ -143,18 +136,18 @@ impl Display for FileLocation {
)]
#[serde(transparent)]
#[rkyv(derive(Debug))]
pub struct UrlString(String);
pub struct UrlString(SmallString);

impl UrlString {
/// Create a new [`UrlString`] from a [`String`].
pub fn new(url: String) -> Self {
pub fn new(url: SmallString) -> Self {
Self(url)
}

/// Converts a [`UrlString`] to a [`Url`].
pub fn to_url(&self) -> Result<Url, ToUrlError> {
Url::from_str(&self.0).map_err(|err| ToUrlError::InvalidAbsolute {
absolute: self.0.clone(),
absolute: self.0.to_string(),
err,
})
}
Expand All @@ -175,8 +168,8 @@ impl UrlString {
self.as_ref()
.split_once('#')
.map(|(path, _)| path)
.unwrap_or(self.as_ref())
.to_string(),
.map(SmallString::from)
.unwrap_or_else(|| self.0.clone()),
)
}
}
Expand All @@ -189,13 +182,13 @@ impl AsRef<str> for UrlString {

impl From<Url> for UrlString {
fn from(value: Url) -> Self {
UrlString(value.to_string())
Self(value.as_str().into())
}
}

impl From<&Url> for UrlString {
fn from(value: &Url) -> Self {
UrlString(value.to_string())
Self(value.as_str().into())
}
}

Expand Down Expand Up @@ -248,28 +241,28 @@ mod tests {

#[test]
fn base_str() {
let url = UrlString("https://example.com/path?query#fragment".to_string());
let url = UrlString("https://example.com/path?query#fragment".into());
assert_eq!(url.base_str(), "https://example.com/path");

let url = UrlString("https://example.com/path#fragment".to_string());
let url = UrlString("https://example.com/path#fragment".into());
assert_eq!(url.base_str(), "https://example.com/path");

let url = UrlString("https://example.com/path".to_string());
let url = UrlString("https://example.com/path".into());
assert_eq!(url.base_str(), "https://example.com/path");
}

#[test]
fn without_fragment() {
let url = UrlString("https://example.com/path?query#fragment".to_string());
let url = UrlString("https://example.com/path?query#fragment".into());
assert_eq!(
url.without_fragment(),
UrlString("https://example.com/path?query".to_string())
UrlString("https://example.com/path?query".into())
);

let url = UrlString("https://example.com/path#fragment".to_string());
let url = UrlString("https://example.com/path#fragment".into());
assert_eq!(url.base_str(), "https://example.com/path");

let url = UrlString("https://example.com/path".to_string());
let url = UrlString("https://example.com/path".into());
assert_eq!(url.base_str(), "https://example.com/path");
}
}
5 changes: 5 additions & 0 deletions crates/uv-pypi-types/src/base_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ impl BaseUrl {
pub fn as_url(&self) -> &Url {
&self.0
}

/// Return the underlying [`Url`] as a serialized string.
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}

impl From<Url> for BaseUrl {
Expand Down
6 changes: 4 additions & 2 deletions crates/uv-pypi-types/src/simple_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ fn sorted_simple_json_files<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<File>
pub struct File {
// PEP 714-renamed field, followed by PEP 691-compliant field, followed by non-PEP 691-compliant
// alias used by PyPI.
//
// TODO(charlie): Use a single value here and move this into the deserializer, to save space.
pub core_metadata: Option<CoreMetadata>,
pub dist_info_metadata: Option<CoreMetadata>,
pub data_dist_info_metadata: Option<CoreMetadata>,
pub filename: String,
pub filename: SmallString,
pub hashes: Hashes,
/// There are a number of invalid specifiers on PyPI, so we first try to parse it into a
/// [`VersionSpecifiers`] according to spec (PEP 440), then a [`LenientVersionSpecifiers`] with
Expand All @@ -53,7 +55,7 @@ pub struct File {
pub requires_python: Option<Result<VersionSpecifiers, VersionSpecifiersParseError>>,
pub size: Option<u64>,
pub upload_time: Option<Timestamp>,
pub url: String,
pub url: SmallString,
pub yanked: Option<Box<Yanked>>,
}

Expand Down
1 change: 1 addition & 0 deletions crates/uv-resolver/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ uv-platform-tags = { workspace = true }
uv-pypi-types = { workspace = true }
uv-python = { workspace = true }
uv-requirements-txt = { workspace = true }
uv-small-str = { workspace = true }
uv-static = { workspace = true }
uv-types = { workspace = true }
uv-warnings = { workspace = true }
Expand Down
Loading

0 comments on commit 275db06

Please sign in to comment.