Skip to content

Commit 5703037

Browse files
committed
worker: Add NormalizeIndex background job
1 parent 09db905 commit 5703037

File tree

3 files changed

+89
-3
lines changed

3 files changed

+89
-3
lines changed

src/background_jobs.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ pub enum Job {
1818
IndexSquash(IndexSquashJob),
1919
IndexSyncToHttp(IndexSyncToHttpJob),
2020
IndexUpdateYanked(IndexUpdateYankedJob),
21+
NormalizeIndex(NormalizeIndexJob),
2122
RenderAndUploadReadme(RenderAndUploadReadmeJob),
2223
UpdateDownloads(UpdateDownloadsJob),
2324
}
@@ -29,6 +30,7 @@ impl Job {
2930
const INDEX_SQUASH: &str = "squash_index";
3031
const INDEX_SYNC_TO_HTTP: &str = "update_crate_index";
3132
const INDEX_UPDATE_YANKED: &str = "sync_yanked";
33+
const NORMALIZE_INDEX: &str = "normalize_index";
3234
const RENDER_AND_UPLOAD_README: &str = "render_and_upload_readme";
3335
const UPDATE_DOWNLOADS: &str = "update_downloads";
3436

@@ -40,6 +42,7 @@ impl Job {
4042
Job::IndexSquash(_) => Self::INDEX_SQUASH,
4143
Job::IndexSyncToHttp(_) => Self::INDEX_SYNC_TO_HTTP,
4244
Job::IndexUpdateYanked(_) => Self::INDEX_UPDATE_YANKED,
45+
Job::NormalizeIndex(_) => Self::NORMALIZE_INDEX,
4346
Job::RenderAndUploadReadme(_) => Self::RENDER_AND_UPLOAD_README,
4447
Job::UpdateDownloads(_) => Self::UPDATE_DOWNLOADS,
4548
}
@@ -53,6 +56,7 @@ impl Job {
5356
Job::IndexSquash(inner) => serde_json::to_value(inner),
5457
Job::IndexSyncToHttp(inner) => serde_json::to_value(inner),
5558
Job::IndexUpdateYanked(inner) => serde_json::to_value(inner),
59+
Job::NormalizeIndex(inner) => serde_json::to_value(inner),
5660
Job::RenderAndUploadReadme(inner) => serde_json::to_value(inner),
5761
Job::UpdateDownloads(inner) => serde_json::to_value(inner),
5862
}
@@ -80,6 +84,7 @@ impl Job {
8084
Self::INDEX_SQUASH => Job::IndexSquash(from_value(value)?),
8185
Self::INDEX_SYNC_TO_HTTP => Job::IndexSyncToHttp(from_value(value)?),
8286
Self::INDEX_UPDATE_YANKED => Job::IndexUpdateYanked(from_value(value)?),
87+
Self::NORMALIZE_INDEX => Job::NormalizeIndex(from_value(value)?),
8388
Self::RENDER_AND_UPLOAD_README => Job::RenderAndUploadReadme(from_value(value)?),
8489
Self::UPDATE_DOWNLOADS => Job::UpdateDownloads(from_value(value)?),
8590
job_type => Err(PerformError::from(format!("Unknown job type {job_type}")))?,
@@ -106,6 +111,7 @@ impl Job {
106111
Job::IndexUpdateYanked(args) => conn.with_connection(&|conn| {
107112
worker::perform_index_update_yanked(env, conn, &args.krate, &args.version_num)
108113
}),
114+
Job::NormalizeIndex(args) => worker::perform_normalize_index(env, args),
109115
Job::RenderAndUploadReadme(args) => conn.with_connection(&|conn| {
110116
worker::perform_render_and_upload_readme(
111117
conn,
@@ -150,6 +156,11 @@ pub struct IndexUpdateYankedJob {
150156
pub(super) version_num: String,
151157
}
152158

159+
#[derive(Serialize, Deserialize)]
160+
pub struct NormalizeIndexJob {
161+
pub dry_run: bool,
162+
}
163+
153164
#[derive(Serialize, Deserialize)]
154165
pub struct RenderAndUploadReadmeJob {
155166
pub(super) version_id: i32,

src/worker/git.rs

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::background_jobs::{
22
Environment, IndexAddCrateJob, IndexSquashJob, IndexSyncToHttpJob, IndexUpdateYankedJob, Job,
3+
NormalizeIndexJob,
34
};
45
use crate::schema;
56
use crate::swirl::PerformError;
@@ -8,7 +9,7 @@ use cargo_registry_index::{Crate, Repository};
89
use chrono::Utc;
910
use diesel::prelude::*;
1011
use std::fs::{self, OpenOptions};
11-
use std::io::ErrorKind;
12+
use std::io::{BufRead, BufReader, ErrorKind};
1213
use std::process::Command;
1314

1415
#[instrument(skip_all, fields(krate.name = ?krate.name, krate.vers = ?krate.vers))]
@@ -178,3 +179,77 @@ pub fn perform_index_squash(env: &Environment) -> Result<(), PerformError> {
178179
pub fn squash_index() -> Job {
179180
Job::IndexSquash(IndexSquashJob {})
180181
}
182+
183+
pub fn perform_normalize_index(
184+
env: &Environment,
185+
args: NormalizeIndexJob,
186+
) -> Result<(), PerformError> {
187+
info!("Normalizing the index");
188+
189+
let repo = env.lock_index()?;
190+
191+
let files = repo.get_files_modified_since(None)?;
192+
let num_files = files.len();
193+
194+
for (i, file) in files.iter().enumerate() {
195+
if i % 50 == 0 {
196+
info!(num_files, i, ?file);
197+
}
198+
199+
let crate_name = file.file_name().unwrap().to_str().unwrap();
200+
let path = repo.index_file(crate_name);
201+
if !path.exists() {
202+
continue;
203+
}
204+
205+
let mut body: Vec<u8> = Vec::new();
206+
let file = fs::File::open(&path)?;
207+
let reader = BufReader::new(file);
208+
let mut versions = Vec::new();
209+
for line in reader.lines() {
210+
let line = line?;
211+
if line.is_empty() {
212+
continue;
213+
}
214+
215+
let mut krate: Crate = serde_json::from_str(&line)?;
216+
for dep in &mut krate.deps {
217+
// Remove deps with empty features
218+
dep.features.retain(|d| !d.is_empty());
219+
// Set null DependencyKind to Normal
220+
dep.kind = Some(
221+
dep.kind
222+
.unwrap_or(cargo_registry_index::DependencyKind::Normal),
223+
);
224+
}
225+
krate.deps.sort();
226+
versions.push(krate);
227+
}
228+
for version in versions {
229+
serde_json::to_writer(&mut body, &version).unwrap();
230+
body.push(b'\n');
231+
}
232+
fs::write(path, body)?;
233+
}
234+
235+
info!("Committing normalization");
236+
let msg = "Normalize index format\n\n\
237+
More information can be found at https://github.com/rust-lang/crates.io/pull/5066";
238+
repo.run_command(Command::new("git").args(["commit", "-am", msg]))?;
239+
240+
let branch = match args.dry_run {
241+
false => "master",
242+
true => "normalization-dry-run",
243+
};
244+
245+
info!(?branch, "Pushing to upstream repository");
246+
repo.run_command(Command::new("git").args(["push", "origin", &format!("HEAD:{branch}")]))?;
247+
248+
info!("Index normalization completed");
249+
250+
Ok(())
251+
}
252+
253+
pub fn normalize_index(dry_run: bool) -> Job {
254+
Job::NormalizeIndex(NormalizeIndexJob { dry_run })
255+
}

src/worker/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ mod update_downloads;
1212

1313
pub use daily_db_maintenance::daily_db_maintenance;
1414
pub use dump_db::dump_db;
15-
pub use git::{add_crate, squash_index, sync_yanked};
15+
pub use git::{add_crate, normalize_index, squash_index, sync_yanked};
1616
pub use readmes::render_and_upload_readme;
1717
pub use update_downloads::update_downloads;
1818

1919
pub(crate) use daily_db_maintenance::perform_daily_db_maintenance;
2020
pub(crate) use dump_db::perform_dump_db;
2121
pub(crate) use git::{
2222
perform_index_add_crate, perform_index_squash, perform_index_sync_to_http,
23-
perform_index_update_yanked,
23+
perform_index_update_yanked, perform_normalize_index,
2424
};
2525
pub(crate) use readmes::perform_render_and_upload_readme;
2626
pub(crate) use update_downloads::perform_update_downloads;

0 commit comments

Comments
 (0)