From 4f80b5e9a84f9a1a60b5b51f22db6740e3fee954 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 4 Oct 2024 11:40:02 +0200 Subject: [PATCH 1/6] worker/jobs: Rename `git` module to `index` This also contains the `SyncToSparseIndex` job, which does not use git at all... --- src/worker/jobs/{git.rs => index.rs} | 0 src/worker/jobs/mod.rs | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename src/worker/jobs/{git.rs => index.rs} (100%) diff --git a/src/worker/jobs/git.rs b/src/worker/jobs/index.rs similarity index 100% rename from src/worker/jobs/git.rs rename to src/worker/jobs/index.rs diff --git a/src/worker/jobs/mod.rs b/src/worker/jobs/mod.rs index 36fd5042a9..2ba1a34f81 100644 --- a/src/worker/jobs/mod.rs +++ b/src/worker/jobs/mod.rs @@ -12,7 +12,7 @@ mod delete_crate; mod downloads; pub mod dump_db; mod expiry_notification; -mod git; +mod index; mod index_version_downloads_archive; mod readmes; pub mod rss; @@ -29,7 +29,7 @@ pub use self::downloads::{ }; pub use self::dump_db::DumpDb; pub use self::expiry_notification::SendTokenExpiryNotifications; -pub use self::git::{NormalizeIndex, SquashIndex, SyncToGitIndex, SyncToSparseIndex}; +pub use self::index::{NormalizeIndex, SquashIndex, SyncToGitIndex, SyncToSparseIndex}; pub use self::index_version_downloads_archive::IndexVersionDownloadsArchive; pub use self::readmes::RenderAndUploadReadme; pub use self::send_publish_notifications::SendPublishNotificationsJob; From 6caabd9ddfeaea23508b37b6510861efbc07f6b6 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 4 Oct 2024 11:40:35 +0200 Subject: [PATCH 2/6] worker/jobs/index: Convert to directory module --- src/worker/jobs/{index.rs => index/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/worker/jobs/{index.rs => index/mod.rs} (100%) diff --git a/src/worker/jobs/index.rs b/src/worker/jobs/index/mod.rs similarity index 100% rename from src/worker/jobs/index.rs rename to src/worker/jobs/index/mod.rs From 21b91229bfbe87aa53f7e55aac19fa642d22ba4c Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 4 Oct 2024 11:42:49 +0200 Subject: [PATCH 3/6] worker/jobs/index: Extract `NormalizeIndex` into dedicated module --- src/worker/jobs/index/mod.rs | 99 ++---------------------------- src/worker/jobs/index/normalize.rs | 99 ++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 93 deletions(-) create mode 100644 src/worker/jobs/index/normalize.rs diff --git a/src/worker/jobs/index/mod.rs b/src/worker/jobs/index/mod.rs index 9fe7a46d82..3d60f1800b 100644 --- a/src/worker/jobs/index/mod.rs +++ b/src/worker/jobs/index/mod.rs @@ -5,17 +5,21 @@ use crate::worker::Environment; use anyhow::Context; use chrono::Utc; use crates_io_env_vars::var_parsed; -use crates_io_index::{Crate, Repository}; +use crates_io_index::Repository; use crates_io_worker::BackgroundJob; use diesel::prelude::*; use diesel_async::async_connection_wrapper::AsyncConnectionWrapper; use sentry::Level; use std::fs::{self, File}; -use std::io::{BufRead, BufReader, ErrorKind, Write}; +use std::io::{ErrorKind, Write}; use std::process::Command; use std::sync::Arc; use url::Url; +mod normalize; + +pub use normalize::NormalizeIndex; + #[derive(Serialize, Deserialize)] pub struct SyncToGitIndex { krate: String, @@ -221,94 +225,3 @@ impl BackgroundJob for SquashIndex { .await } } - -#[derive(Serialize, Deserialize)] -pub struct NormalizeIndex { - dry_run: bool, -} - -impl NormalizeIndex { - pub fn new(dry_run: bool) -> Self { - Self { dry_run } - } -} - -impl BackgroundJob for NormalizeIndex { - const JOB_NAME: &'static str = "normalize_index"; - const QUEUE: &'static str = "repository"; - - type Context = Arc; - - async fn run(&self, env: Self::Context) -> anyhow::Result<()> { - info!("Normalizing the index"); - - let dry_run = self.dry_run; - spawn_blocking(move || { - let repo = env.lock_index()?; - - let files = repo.get_files_modified_since(None)?; - let num_files = files.len(); - - for (i, file) in files.iter().enumerate() { - if i % 50 == 0 { - info!(num_files, i, ?file); - } - - let crate_name = file.file_name().unwrap().to_str().unwrap(); - let path = repo.index_file(crate_name); - if !path.exists() { - continue; - } - - let mut body: Vec = Vec::new(); - let file = fs::File::open(&path)?; - let reader = BufReader::new(file); - let mut versions = Vec::new(); - for line in reader.lines() { - let line = line?; - if line.is_empty() { - continue; - } - - let mut krate: Crate = serde_json::from_str(&line)?; - for dep in &mut krate.deps { - // Remove deps with empty features - dep.features.retain(|d| !d.is_empty()); - // Set null DependencyKind to Normal - dep.kind = - Some(dep.kind.unwrap_or(crates_io_index::DependencyKind::Normal)); - } - krate.deps.sort(); - versions.push(krate); - } - for version in versions { - serde_json::to_writer(&mut body, &version).unwrap(); - body.push(b'\n'); - } - fs::write(path, body)?; - } - - info!("Committing normalization"); - let msg = "Normalize index format\n\n\ - More information can be found at https://github.com/rust-lang/crates.io/pull/5066"; - repo.run_command(Command::new("git").args(["commit", "-am", msg]))?; - - let branch = match dry_run { - false => "master", - true => "normalization-dry-run", - }; - - info!(?branch, "Pushing to upstream repository"); - repo.run_command(Command::new("git").args([ - "push", - "origin", - &format!("HEAD:{branch}"), - ]))?; - - info!("Index normalization completed"); - - Ok(()) - }) - .await - } -} diff --git a/src/worker/jobs/index/normalize.rs b/src/worker/jobs/index/normalize.rs new file mode 100644 index 0000000000..7f9cba8c9a --- /dev/null +++ b/src/worker/jobs/index/normalize.rs @@ -0,0 +1,99 @@ +use crate::tasks::spawn_blocking; +use crate::worker::Environment; +use crates_io_index::Crate; +use crates_io_worker::BackgroundJob; +use std::fs; +use std::io::{BufRead, BufReader}; +use std::process::Command; +use std::sync::Arc; + +#[derive(Serialize, Deserialize)] +pub struct NormalizeIndex { + dry_run: bool, +} + +impl NormalizeIndex { + pub fn new(dry_run: bool) -> Self { + Self { dry_run } + } +} + +impl BackgroundJob for NormalizeIndex { + const JOB_NAME: &'static str = "normalize_index"; + const QUEUE: &'static str = "repository"; + + type Context = Arc; + + async fn run(&self, env: Self::Context) -> anyhow::Result<()> { + info!("Normalizing the index"); + + let dry_run = self.dry_run; + spawn_blocking(move || { + let repo = env.lock_index()?; + + let files = repo.get_files_modified_since(None)?; + let num_files = files.len(); + + for (i, file) in files.iter().enumerate() { + if i % 50 == 0 { + info!(num_files, i, ?file); + } + + let crate_name = file.file_name().unwrap().to_str().unwrap(); + let path = repo.index_file(crate_name); + if !path.exists() { + continue; + } + + let mut body: Vec = Vec::new(); + let file = fs::File::open(&path)?; + let reader = BufReader::new(file); + let mut versions = Vec::new(); + for line in reader.lines() { + let line = line?; + if line.is_empty() { + continue; + } + + let mut krate: Crate = serde_json::from_str(&line)?; + for dep in &mut krate.deps { + // Remove deps with empty features + dep.features.retain(|d| !d.is_empty()); + // Set null DependencyKind to Normal + dep.kind = + Some(dep.kind.unwrap_or(crates_io_index::DependencyKind::Normal)); + } + krate.deps.sort(); + versions.push(krate); + } + for version in versions { + serde_json::to_writer(&mut body, &version).unwrap(); + body.push(b'\n'); + } + fs::write(path, body)?; + } + + info!("Committing normalization"); + let msg = "Normalize index format\n\n\ + More information can be found at https://github.com/rust-lang/crates.io/pull/5066"; + repo.run_command(Command::new("git").args(["commit", "-am", msg]))?; + + let branch = match dry_run { + false => "master", + true => "normalization-dry-run", + }; + + info!(?branch, "Pushing to upstream repository"); + repo.run_command(Command::new("git").args([ + "push", + "origin", + &format!("HEAD:{branch}"), + ]))?; + + info!("Index normalization completed"); + + Ok(()) + }) + .await + } +} From 77befd4272f3d154c3492dc17067d02debdaa570 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 4 Oct 2024 11:43:51 +0200 Subject: [PATCH 4/6] worker/jobs/index: Extract `SquashIndex` into dedicated module --- src/worker/jobs/index/mod.rs | 64 +------------------------------- src/worker/jobs/index/squash.rs | 66 +++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 62 deletions(-) create mode 100644 src/worker/jobs/index/squash.rs diff --git a/src/worker/jobs/index/mod.rs b/src/worker/jobs/index/mod.rs index 3d60f1800b..bb6322d1f8 100644 --- a/src/worker/jobs/index/mod.rs +++ b/src/worker/jobs/index/mod.rs @@ -3,8 +3,6 @@ use crate::tasks::spawn_blocking; use crate::util::diesel::Conn; use crate::worker::Environment; use anyhow::Context; -use chrono::Utc; -use crates_io_env_vars::var_parsed; use crates_io_index::Repository; use crates_io_worker::BackgroundJob; use diesel::prelude::*; @@ -12,13 +10,13 @@ use diesel_async::async_connection_wrapper::AsyncConnectionWrapper; use sentry::Level; use std::fs::{self, File}; use std::io::{ErrorKind, Write}; -use std::process::Command; use std::sync::Arc; -use url::Url; mod normalize; +mod squash; pub use normalize::NormalizeIndex; +pub use squash::SquashIndex; #[derive(Serialize, Deserialize)] pub struct SyncToGitIndex { @@ -167,61 +165,3 @@ pub fn get_index_data(name: &str, conn: &mut impl Conn) -> anyhow::Result