diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index b7f15e714c7f..a704af56207c 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -565,9 +565,9 @@ impl Cache { let mut references = FxHashSet::default(); for bucket in CacheBucket::iter() { - let bucket = self.bucket(bucket); - if bucket.is_dir() { - for entry in walkdir::WalkDir::new(bucket) { + let bucket_path = self.bucket(bucket); + if bucket_path.is_dir() { + for entry in walkdir::WalkDir::new(bucket_path) { let entry = entry?; // Ignore any `.lock` files. @@ -579,7 +579,6 @@ impl Cache { continue; } - // Identify entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`). let Some(filename) = entry .path() .file_name() @@ -587,9 +586,18 @@ impl Cache { else { continue; }; - if WheelFilename::from_stem(filename).is_err() { + + if bucket == CacheBucket::Wheels { + // In `wheels` bucket hash is used for the directory name so we can't rely on stem pattern. + // Instead we skip if it contains an extension (example: .whl, .http, .rev and .msgpack files). + if std::path::Path::new(filename).extension().is_some() { + continue; + } + } else if WheelFilename::from_stem(filename).is_err() { + // For other buckets only include entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`). continue; } + if let Ok(target) = self.resolve_link(entry.path()) { references.insert(target); } @@ -1040,7 +1048,7 @@ impl CacheBucket { Self::Simple => "simple-v15", // Note that when bumping this, you'll also need to bump it // in `crates/uv/tests/it/cache_prune.rs`. - Self::Wheels => "wheels-v4", + Self::Wheels => "wheels-v5", // Note that when bumping this, you'll also need to bump // `ARCHIVE_VERSION` in `crates/uv-cache/src/lib.rs`. Self::Archive => "archive-v0", diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs index 1dc2688648a0..103a051619fd 100644 --- a/crates/uv-client/src/registry_client.rs +++ b/crates/uv-client/src/registry_client.rs @@ -15,6 +15,7 @@ use tracing::{info_span, instrument, trace, warn, Instrument}; use url::Url; use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache}; +use uv_cache_key::cache_digest; use uv_configuration::KeyringProviderType; use uv_configuration::{IndexStrategy, TrustedHost}; use uv_distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; @@ -618,7 +619,7 @@ impl RegistryClient { let cache_entry = self.cache.entry( CacheBucket::Wheels, WheelCache::Index(index).wheel_dir(filename.name.as_ref()), - format!("{}.msgpack", filename.stem()), + format!("{}.msgpack", cache_digest(&filename.stem())), ); let cache_control = match self.connectivity { Connectivity::Online => CacheControl::from( @@ -688,7 +689,7 @@ impl RegistryClient { let cache_entry = self.cache.entry( CacheBucket::Wheels, cache_shard.wheel_dir(filename.name.as_ref()), - format!("{}.msgpack", filename.stem()), + format!("{}.msgpack", cache_digest(&filename.stem())), ); let cache_control = match self.connectivity { Connectivity::Online => CacheControl::from( diff --git a/crates/uv-distribution/src/archive.rs b/crates/uv-distribution/src/archive.rs index 9a1dbb86834b..8567362f0be8 100644 --- a/crates/uv-distribution/src/archive.rs +++ b/crates/uv-distribution/src/archive.rs @@ -1,4 +1,5 @@ use uv_cache::{ArchiveId, Cache, ARCHIVE_VERSION}; +use uv_distribution_filename::WheelFilename; use uv_distribution_types::Hashed; use uv_pypi_types::HashDigest; @@ -9,16 +10,19 @@ pub struct Archive { pub id: ArchiveId, /// The computed hashes of the archive. pub hashes: Vec, + /// The filename of the wheel. + pub filename: WheelFilename, /// The version of the archive bucket. pub version: u8, } impl Archive { /// Create a new [`Archive`] with the given ID and hashes. - pub(crate) fn new(id: ArchiveId, hashes: Vec) -> Self { + pub(crate) fn new(id: ArchiveId, hashes: Vec, filename: WheelFilename) -> Self { Self { id, hashes, + filename, version: ARCHIVE_VERSION, } } diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index 3c3ef575f49a..3f4f6306aa89 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -16,6 +16,7 @@ use url::Url; use uv_cache::{ArchiveId, CacheBucket, CacheEntry, WheelCache}; use uv_cache_info::{CacheInfo, Timestamp}; +use uv_cache_key::cache_digest; use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, }; @@ -193,7 +194,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { let wheel_entry = self.build_context.cache().entry( CacheBucket::Wheels, WheelCache::Index(&wheel.index).wheel_dir(wheel.name().as_ref()), - wheel.filename.stem(), + cache_digest(&wheel.filename.stem()), ); // If the URL is a file URL, load the wheel directly. @@ -266,7 +267,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { let wheel_entry = self.build_context.cache().entry( CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), - wheel.filename.stem(), + cache_digest(&wheel.filename.stem()), ); // Download and unzip. @@ -321,7 +322,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { let cache_entry = self.build_context.cache().entry( CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), - wheel.filename.stem(), + cache_digest(&wheel.filename.stem()), ); self.load_wheel( @@ -536,7 +537,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { }; // Create an entry for the HTTP cache. - let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem())); + let http_entry = wheel_entry.with_file(format!("{}.http", cache_digest(&filename.stem()))); let download = |response: reqwest::Response| { async { @@ -591,6 +592,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { Ok(Archive::new( id, hashers.into_iter().map(HashDigest::from).collect(), + filename.clone(), )) } .instrument(info_span!("wheel", wheel = %dist)) @@ -668,7 +670,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { }; // Create an entry for the HTTP cache. - let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem())); + let http_entry = wheel_entry.with_file(format!("{}.http", cache_digest(&filename.stem()))); let download = |response: reqwest::Response| { async { @@ -755,7 +757,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { reporter.on_download_complete(dist.name(), progress); } - Ok(Archive::new(id, hashes)) + Ok(Archive::new(id, hashes, filename.clone())) } .instrument(info_span!("wheel", wheel = %dist)) }; @@ -833,7 +835,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { let modified = Timestamp::from_path(path).map_err(Error::CacheRead)?; // Attempt to read the archive pointer from the cache. - let pointer_entry = wheel_entry.with_file(format!("{}.rev", filename.stem())); + let pointer_entry = + wheel_entry.with_file(format!("{}.rev", cache_digest(&filename.stem()))); let pointer = LocalArchivePointer::read_from(&pointer_entry)?; // Extract the archive from the pointer. @@ -853,7 +856,11 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { }) } else if hashes.is_none() { // Otherwise, unzip the wheel. - let archive = Archive::new(self.unzip_wheel(path, wheel_entry.path()).await?, vec![]); + let archive = Archive::new( + self.unzip_wheel(path, wheel_entry.path()).await?, + vec![], + filename.clone(), + ); // Write the archive pointer to the cache. let pointer = LocalArchivePointer { @@ -899,7 +906,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { .map_err(Error::CacheWrite)?; // Create an archive. - let archive = Archive::new(id, hashes); + let archive = Archive::new(id, hashes, filename.clone()); // Write the archive pointer to the cache. let pointer = LocalArchivePointer { diff --git a/crates/uv-distribution/src/index/cached_wheel.rs b/crates/uv-distribution/src/index/cached_wheel.rs index cacdbf3d9a3e..ffca47531c7f 100644 --- a/crates/uv-distribution/src/index/cached_wheel.rs +++ b/crates/uv-distribution/src/index/cached_wheel.rs @@ -116,14 +116,10 @@ impl CachedWheel { } } - /// Read a cached wheel from a `.http` pointer (e.g., `anyio-4.0.0-py3-none-any.http`). + /// Read a cached wheel from a `.http` pointer pub fn from_http_pointer(path: impl AsRef, cache: &Cache) -> Option { let path = path.as_ref(); - // Determine the wheel filename. - let filename = path.file_stem()?.to_str()?; - let filename = WheelFilename::from_stem(filename).ok()?; - // Read the pointer. let pointer = HttpArchivePointer::read_from(path).ok()??; let cache_info = pointer.to_cache_info(); @@ -140,7 +136,7 @@ impl CachedWheel { // Convert to a cached wheel. Some(Self { - filename, + filename: archive.filename, entry, hashes, cache_info, @@ -151,10 +147,6 @@ impl CachedWheel { pub fn from_local_pointer(path: impl AsRef, cache: &Cache) -> Option { let path = path.as_ref(); - // Determine the wheel filename. - let filename = path.file_stem()?.to_str()?; - let filename = WheelFilename::from_stem(filename).ok()?; - // Read the pointer. let pointer = LocalArchivePointer::read_from(path).ok()??; let cache_info = pointer.to_cache_info(); @@ -170,7 +162,7 @@ impl CachedWheel { // Convert to a cached wheel. let entry = cache.entry(CacheBucket::Archive, "", id); Some(Self { - filename, + filename: archive.filename, entry, hashes, cache_info, diff --git a/crates/uv-distribution/src/source/built_wheel_metadata.rs b/crates/uv-distribution/src/source/built_wheel_metadata.rs index 09e62d78ea68..4f5d001b9489 100644 --- a/crates/uv-distribution/src/source/built_wheel_metadata.rs +++ b/crates/uv-distribution/src/source/built_wheel_metadata.rs @@ -29,8 +29,8 @@ pub(crate) struct BuiltWheelMetadata { impl BuiltWheelMetadata { /// Find a compatible wheel in the cache. pub(crate) fn find_in_cache(tags: &Tags, cache_shard: &CacheShard) -> Option { - for directory in files(cache_shard) { - if let Some(metadata) = Self::from_path(directory, cache_shard) { + for file in files(cache_shard) { + if let Some(metadata) = Self::from_path(file, cache_shard) { // Validate that the wheel is compatible with the target platform. if metadata.filename.is_compatible(tags) { return Some(metadata); diff --git a/crates/uv-installer/src/plan.rs b/crates/uv-installer/src/plan.rs index b0a26b9bf33e..d5e019fbcdcf 100644 --- a/crates/uv-installer/src/plan.rs +++ b/crates/uv-installer/src/plan.rs @@ -3,6 +3,7 @@ use tracing::{debug, warn}; use uv_cache::{Cache, CacheBucket, WheelCache}; use uv_cache_info::Timestamp; +use uv_cache_key::cache_digest; use uv_configuration::{BuildOptions, ConfigSettings, Reinstall}; use uv_distribution::{ BuiltWheelIndex, HttpArchivePointer, LocalArchivePointer, RegistryWheelIndex, @@ -158,7 +159,7 @@ impl<'a> Planner<'a> { CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), ) - .entry(format!("{}.http", wheel.filename.stem())); + .entry(format!("{}.http", cache_digest(&wheel.filename.stem()))); // Read the HTTP pointer. if let Some(pointer) = HttpArchivePointer::read_from(&cache_entry)? { @@ -209,7 +210,7 @@ impl<'a> Planner<'a> { CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), ) - .entry(format!("{}.rev", wheel.filename.stem())); + .entry(format!("{}.rev", cache_digest(&wheel.filename.stem()))); if let Some(pointer) = LocalArchivePointer::read_from(&cache_entry)? { let timestamp = Timestamp::from_path(&wheel.install_path)?; diff --git a/crates/uv/tests/it/cache_prune.rs b/crates/uv/tests/it/cache_prune.rs index bb4bd876bbdb..43e52d1d3920 100644 --- a/crates/uv/tests/it/cache_prune.rs +++ b/crates/uv/tests/it/cache_prune.rs @@ -158,7 +158,7 @@ fn prune_stale_symlink() -> Result<()> { .success(); // Remove the wheels directory, causing the symlink to become stale. - let wheels = context.cache_dir.child("wheels-v4"); + let wheels = context.cache_dir.child("wheels-v5"); fs_err::remove_dir_all(wheels)?; let filters: Vec<_> = context