Skip to content

Commit

Permalink
use hash instead of full wheel name in wheels cache
Browse files Browse the repository at this point in the history
  • Loading branch information
nkitsaini committed Feb 24, 2025
1 parent f9b638a commit d49319c
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 34 deletions.
20 changes: 14 additions & 6 deletions crates/uv-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,9 +565,9 @@ impl Cache {
let mut references = FxHashSet::default();

for bucket in CacheBucket::iter() {
let bucket = self.bucket(bucket);
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket) {
let bucket_path = self.bucket(bucket);
if bucket_path.is_dir() {
for entry in walkdir::WalkDir::new(bucket_path) {
let entry = entry?;

// Ignore any `.lock` files.
Expand All @@ -579,17 +579,25 @@ impl Cache {
continue;
}

// Identify entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`).
let Some(filename) = entry
.path()
.file_name()
.and_then(|file_name| file_name.to_str())
else {
continue;
};
if WheelFilename::from_stem(filename).is_err() {

if bucket == CacheBucket::Wheels {
// In `wheels` bucket hash is used for the directory name so we can't rely on stem pattern.
// Instead we skip if it contains an extension (example: .whl, .http, .rev and .msgpack files).
if std::path::Path::new(filename).extension().is_some() {
continue;
}
} else if WheelFilename::from_stem(filename).is_err() {
// For other buckets only include entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`).
continue;
}

if let Ok(target) = self.resolve_link(entry.path()) {
references.insert(target);
}
Expand Down Expand Up @@ -1040,7 +1048,7 @@ impl CacheBucket {
Self::Simple => "simple-v15",
// Note that when bumping this, you'll also need to bump it
// in `crates/uv/tests/it/cache_prune.rs`.
Self::Wheels => "wheels-v4",
Self::Wheels => "wheels-v5",
// Note that when bumping this, you'll also need to bump
// `ARCHIVE_VERSION` in `crates/uv-cache/src/lib.rs`.
Self::Archive => "archive-v0",
Expand Down
5 changes: 3 additions & 2 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use tracing::{info_span, instrument, trace, warn, Instrument};
use url::Url;

use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use uv_cache_key::cache_digest;
use uv_configuration::KeyringProviderType;
use uv_configuration::{IndexStrategy, TrustedHost};
use uv_distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
Expand Down Expand Up @@ -618,7 +619,7 @@ impl RegistryClient {
let cache_entry = self.cache.entry(
CacheBucket::Wheels,
WheelCache::Index(index).wheel_dir(filename.name.as_ref()),
format!("{}.msgpack", filename.stem()),
format!("{}.msgpack", cache_digest(&filename.stem())),
);
let cache_control = match self.connectivity {
Connectivity::Online => CacheControl::from(
Expand Down Expand Up @@ -688,7 +689,7 @@ impl RegistryClient {
let cache_entry = self.cache.entry(
CacheBucket::Wheels,
cache_shard.wheel_dir(filename.name.as_ref()),
format!("{}.msgpack", filename.stem()),
format!("{}.msgpack", cache_digest(&filename.stem())),
);
let cache_control = match self.connectivity {
Connectivity::Online => CacheControl::from(
Expand Down
6 changes: 5 additions & 1 deletion crates/uv-distribution/src/archive.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use uv_cache::{ArchiveId, Cache, ARCHIVE_VERSION};
use uv_distribution_filename::WheelFilename;
use uv_distribution_types::Hashed;
use uv_pypi_types::HashDigest;

Expand All @@ -9,16 +10,19 @@ pub struct Archive {
pub id: ArchiveId,
/// The computed hashes of the archive.
pub hashes: Vec<HashDigest>,
/// The filename of the wheel.
pub filename: WheelFilename,
/// The version of the archive bucket.
pub version: u8,
}

impl Archive {
/// Create a new [`Archive`] with the given ID and hashes.
pub(crate) fn new(id: ArchiveId, hashes: Vec<HashDigest>) -> Self {
pub(crate) fn new(id: ArchiveId, hashes: Vec<HashDigest>, filename: WheelFilename) -> Self {
Self {
id,
hashes,
filename,
version: ARCHIVE_VERSION,
}
}
Expand Down
25 changes: 16 additions & 9 deletions crates/uv-distribution/src/distribution_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use url::Url;

use uv_cache::{ArchiveId, CacheBucket, CacheEntry, WheelCache};
use uv_cache_info::{CacheInfo, Timestamp};
use uv_cache_key::cache_digest;
use uv_client::{
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
};
Expand Down Expand Up @@ -193,7 +194,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
let wheel_entry = self.build_context.cache().entry(
CacheBucket::Wheels,
WheelCache::Index(&wheel.index).wheel_dir(wheel.name().as_ref()),
wheel.filename.stem(),
cache_digest(&wheel.filename.stem()),
);

// If the URL is a file URL, load the wheel directly.
Expand Down Expand Up @@ -266,7 +267,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
let wheel_entry = self.build_context.cache().entry(
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()),
wheel.filename.stem(),
cache_digest(&wheel.filename.stem()),
);

// Download and unzip.
Expand Down Expand Up @@ -321,7 +322,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
let cache_entry = self.build_context.cache().entry(
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()),
wheel.filename.stem(),
cache_digest(&wheel.filename.stem()),
);

self.load_wheel(
Expand Down Expand Up @@ -536,7 +537,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
};

// Create an entry for the HTTP cache.
let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem()));
let http_entry = wheel_entry.with_file(format!("{}.http", cache_digest(&filename.stem())));

let download = |response: reqwest::Response| {
async {
Expand Down Expand Up @@ -591,6 +592,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
Ok(Archive::new(
id,
hashers.into_iter().map(HashDigest::from).collect(),
filename.clone(),
))
}
.instrument(info_span!("wheel", wheel = %dist))
Expand Down Expand Up @@ -668,7 +670,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
};

// Create an entry for the HTTP cache.
let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem()));
let http_entry = wheel_entry.with_file(format!("{}.http", cache_digest(&filename.stem())));

let download = |response: reqwest::Response| {
async {
Expand Down Expand Up @@ -755,7 +757,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
reporter.on_download_complete(dist.name(), progress);
}

Ok(Archive::new(id, hashes))
Ok(Archive::new(id, hashes, filename.clone()))
}
.instrument(info_span!("wheel", wheel = %dist))
};
Expand Down Expand Up @@ -833,7 +835,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
let modified = Timestamp::from_path(path).map_err(Error::CacheRead)?;

// Attempt to read the archive pointer from the cache.
let pointer_entry = wheel_entry.with_file(format!("{}.rev", filename.stem()));
let pointer_entry =
wheel_entry.with_file(format!("{}.rev", cache_digest(&filename.stem())));
let pointer = LocalArchivePointer::read_from(&pointer_entry)?;

// Extract the archive from the pointer.
Expand All @@ -853,7 +856,11 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
})
} else if hashes.is_none() {
// Otherwise, unzip the wheel.
let archive = Archive::new(self.unzip_wheel(path, wheel_entry.path()).await?, vec![]);
let archive = Archive::new(
self.unzip_wheel(path, wheel_entry.path()).await?,
vec![],
filename.clone(),
);

// Write the archive pointer to the cache.
let pointer = LocalArchivePointer {
Expand Down Expand Up @@ -899,7 +906,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.map_err(Error::CacheWrite)?;

// Create an archive.
let archive = Archive::new(id, hashes);
let archive = Archive::new(id, hashes, filename.clone());

// Write the archive pointer to the cache.
let pointer = LocalArchivePointer {
Expand Down
14 changes: 3 additions & 11 deletions crates/uv-distribution/src/index/cached_wheel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,10 @@ impl CachedWheel {
}
}

/// Read a cached wheel from a `.http` pointer (e.g., `anyio-4.0.0-py3-none-any.http`).
/// Read a cached wheel from a `.http` pointer
pub fn from_http_pointer(path: impl AsRef<Path>, cache: &Cache) -> Option<Self> {
let path = path.as_ref();

// Determine the wheel filename.
let filename = path.file_stem()?.to_str()?;
let filename = WheelFilename::from_stem(filename).ok()?;

// Read the pointer.
let pointer = HttpArchivePointer::read_from(path).ok()??;
let cache_info = pointer.to_cache_info();
Expand All @@ -140,7 +136,7 @@ impl CachedWheel {

// Convert to a cached wheel.
Some(Self {
filename,
filename: archive.filename,
entry,
hashes,
cache_info,
Expand All @@ -151,10 +147,6 @@ impl CachedWheel {
pub fn from_local_pointer(path: impl AsRef<Path>, cache: &Cache) -> Option<Self> {
let path = path.as_ref();

// Determine the wheel filename.
let filename = path.file_stem()?.to_str()?;
let filename = WheelFilename::from_stem(filename).ok()?;

// Read the pointer.
let pointer = LocalArchivePointer::read_from(path).ok()??;
let cache_info = pointer.to_cache_info();
Expand All @@ -170,7 +162,7 @@ impl CachedWheel {
// Convert to a cached wheel.
let entry = cache.entry(CacheBucket::Archive, "", id);
Some(Self {
filename,
filename: archive.filename,
entry,
hashes,
cache_info,
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-distribution/src/source/built_wheel_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ pub(crate) struct BuiltWheelMetadata {
impl BuiltWheelMetadata {
/// Find a compatible wheel in the cache.
pub(crate) fn find_in_cache(tags: &Tags, cache_shard: &CacheShard) -> Option<Self> {
for directory in files(cache_shard) {
if let Some(metadata) = Self::from_path(directory, cache_shard) {
for file in files(cache_shard) {
if let Some(metadata) = Self::from_path(file, cache_shard) {
// Validate that the wheel is compatible with the target platform.
if metadata.filename.is_compatible(tags) {
return Some(metadata);
Expand Down
5 changes: 3 additions & 2 deletions crates/uv-installer/src/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use tracing::{debug, warn};

use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_cache_info::Timestamp;
use uv_cache_key::cache_digest;
use uv_configuration::{BuildOptions, ConfigSettings, Reinstall};
use uv_distribution::{
BuiltWheelIndex, HttpArchivePointer, LocalArchivePointer, RegistryWheelIndex,
Expand Down Expand Up @@ -158,7 +159,7 @@ impl<'a> Planner<'a> {
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()),
)
.entry(format!("{}.http", wheel.filename.stem()));
.entry(format!("{}.http", cache_digest(&wheel.filename.stem())));

// Read the HTTP pointer.
if let Some(pointer) = HttpArchivePointer::read_from(&cache_entry)? {
Expand Down Expand Up @@ -209,7 +210,7 @@ impl<'a> Planner<'a> {
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()),
)
.entry(format!("{}.rev", wheel.filename.stem()));
.entry(format!("{}.rev", cache_digest(&wheel.filename.stem())));

if let Some(pointer) = LocalArchivePointer::read_from(&cache_entry)? {
let timestamp = Timestamp::from_path(&wheel.install_path)?;
Expand Down
2 changes: 1 addition & 1 deletion crates/uv/tests/it/cache_prune.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ fn prune_stale_symlink() -> Result<()> {
.success();

// Remove the wheels directory, causing the symlink to become stale.
let wheels = context.cache_dir.child("wheels-v4");
let wheels = context.cache_dir.child("wheels-v5");
fs_err::remove_dir_all(wheels)?;

let filters: Vec<_> = context
Expand Down

0 comments on commit d49319c

Please sign in to comment.