From b01f04149cf4f107d467c03520e7152ea346714f Mon Sep 17 00:00:00 2001 From: Arlo Siemsen Date: Thu, 18 Aug 2022 13:16:22 -0500 Subject: [PATCH 01/15] DependencyKind: Implement `From` --- src/models/dependency.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/models/dependency.rs b/src/models/dependency.rs index 6e72d0ef99a..a1ce360b50d 100644 --- a/src/models/dependency.rs +++ b/src/models/dependency.rs @@ -44,6 +44,16 @@ pub enum DependencyKind { // if you add a kind here, be sure to update `from_row` below. } +impl From for DependencyKind { + fn from(dk: IndexDependencyKind) -> Self { + match dk { + IndexDependencyKind::Normal => DependencyKind::Normal, + IndexDependencyKind::Build => DependencyKind::Build, + IndexDependencyKind::Dev => DependencyKind::Dev, + } + } +} + impl From for IndexDependencyKind { fn from(dk: DependencyKind) -> Self { match dk { From c01671aa1a167d1e86f8f8b678f450e39aa07a9a Mon Sep 17 00:00:00 2001 From: Arlo Siemsen Date: Thu, 18 Aug 2022 13:16:22 -0500 Subject: [PATCH 02/15] Krate: Add `index_metadata()` method --- src/models/krate.rs | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/models/krate.rs b/src/models/krate.rs index e4b1d540b60..4611f0e5dfa 100644 --- a/src/models/krate.rs +++ b/src/models/krate.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use chrono::NaiveDateTime; use diesel::associations::Identifiable; use diesel::pg::Pg; @@ -432,6 +434,76 @@ impl Crate { Ok(rows.records_and_total()) } + + /// Serialize the crate as an index metadata file + pub fn index_metadata(&self, conn: &mut PgConnection) -> QueryResult { + let mut versions: Vec = self.all_versions().load(conn)?; + versions.sort_by_cached_key(|k| { + semver::Version::parse(&k.num) + .expect("version was valid semver when inserted into the database") + }); + + let mut body = Vec::new(); + for version in versions { + let mut deps: Vec = version + .dependencies(conn)? + .into_iter() + .map(|(dep, name)| { + // If this dependency has an explicit name in `Cargo.toml` that + // means that the `name` we have listed is actually the package name + // that we're depending on. The `name` listed in the index is the + // Cargo.toml-written-name which is what cargo uses for + // `--extern foo=...` + let (name, package) = match dep.explicit_name { + Some(explicit_name) => (explicit_name, Some(name)), + None => (name, None), + }; + cargo_registry_index::Dependency { + name, + req: dep.req, + features: dep.features, + optional: dep.optional, + default_features: dep.default_features, + kind: Some(dep.kind.into()), + package, + target: dep.target, + } + }) + .collect(); + deps.sort(); + + let features: BTreeMap> = + serde_json::from_value(version.features).unwrap_or_default(); + let (features, features2): (BTreeMap<_, _>, BTreeMap<_, _>) = + features.into_iter().partition(|(_k, vals)| { + !vals + .iter() + .any(|v| v.starts_with("dep:") || v.contains("?/")) + }); + + let (features2, v) = if features2.is_empty() { + (None, None) + } else { + (Some(features2), Some(2)) + }; + + let krate = cargo_registry_index::Crate { + name: self.name.clone(), + vers: version.num.to_string(), + cksum: version.checksum, + yanked: Some(version.yanked), + deps, + features, + links: version.links, + features2, + v, + }; + serde_json::to_writer(&mut body, &krate).unwrap(); + body.push(b'\n'); + } + let body = String::from_utf8(body).unwrap(); + Ok(body) + } } #[cfg(test)] From 62bc0573b459617cc5aaf85f8a44ceaf82c7167e Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 20 Apr 2023 11:23:26 +0200 Subject: [PATCH 03/15] Krate::index_metadata: Return `Vec` instead of `String` Let's leave the serialization to the `cargo-registry-index` crate instead. --- src/models/krate.rs | 125 +++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 61 deletions(-) diff --git a/src/models/krate.rs b/src/models/krate.rs index 4611f0e5dfa..2699634cd80 100644 --- a/src/models/krate.rs +++ b/src/models/krate.rs @@ -435,74 +435,77 @@ impl Crate { Ok(rows.records_and_total()) } - /// Serialize the crate as an index metadata file - pub fn index_metadata(&self, conn: &mut PgConnection) -> QueryResult { + /// Gather all the necessary data to write an index metadata file + pub fn index_metadata( + &self, + conn: &mut PgConnection, + ) -> QueryResult> { let mut versions: Vec = self.all_versions().load(conn)?; versions.sort_by_cached_key(|k| { semver::Version::parse(&k.num) .expect("version was valid semver when inserted into the database") }); - let mut body = Vec::new(); - for version in versions { - let mut deps: Vec = version - .dependencies(conn)? - .into_iter() - .map(|(dep, name)| { - // If this dependency has an explicit name in `Cargo.toml` that - // means that the `name` we have listed is actually the package name - // that we're depending on. The `name` listed in the index is the - // Cargo.toml-written-name which is what cargo uses for - // `--extern foo=...` - let (name, package) = match dep.explicit_name { - Some(explicit_name) => (explicit_name, Some(name)), - None => (name, None), - }; - cargo_registry_index::Dependency { - name, - req: dep.req, - features: dep.features, - optional: dep.optional, - default_features: dep.default_features, - kind: Some(dep.kind.into()), - package, - target: dep.target, - } - }) - .collect(); - deps.sort(); - - let features: BTreeMap> = - serde_json::from_value(version.features).unwrap_or_default(); - let (features, features2): (BTreeMap<_, _>, BTreeMap<_, _>) = - features.into_iter().partition(|(_k, vals)| { - !vals - .iter() - .any(|v| v.starts_with("dep:") || v.contains("?/")) - }); - - let (features2, v) = if features2.is_empty() { - (None, None) - } else { - (Some(features2), Some(2)) - }; + versions + .into_iter() + .map(|version| { + let mut deps: Vec = version + .dependencies(conn)? + .into_iter() + .map(|(dep, name)| { + // If this dependency has an explicit name in `Cargo.toml` that + // means that the `name` we have listed is actually the package name + // that we're depending on. The `name` listed in the index is the + // Cargo.toml-written-name which is what cargo uses for + // `--extern foo=...` + let (name, package) = match dep.explicit_name { + Some(explicit_name) => (explicit_name, Some(name)), + None => (name, None), + }; + cargo_registry_index::Dependency { + name, + req: dep.req, + features: dep.features, + optional: dep.optional, + default_features: dep.default_features, + kind: Some(dep.kind.into()), + package, + target: dep.target, + } + }) + .collect(); + deps.sort(); + + let features: BTreeMap> = + serde_json::from_value(version.features).unwrap_or_default(); + let (features, features2): (BTreeMap<_, _>, BTreeMap<_, _>) = + features.into_iter().partition(|(_k, vals)| { + !vals + .iter() + .any(|v| v.starts_with("dep:") || v.contains("?/")) + }); + + let (features2, v) = if features2.is_empty() { + (None, None) + } else { + (Some(features2), Some(2)) + }; - let krate = cargo_registry_index::Crate { - name: self.name.clone(), - vers: version.num.to_string(), - cksum: version.checksum, - yanked: Some(version.yanked), - deps, - features, - links: version.links, - features2, - v, - }; - serde_json::to_writer(&mut body, &krate).unwrap(); - body.push(b'\n'); - } - let body = String::from_utf8(body).unwrap(); - Ok(body) + let krate = cargo_registry_index::Crate { + name: self.name.clone(), + vers: version.num.to_string(), + cksum: version.checksum, + yanked: Some(version.yanked), + deps, + features, + links: version.links, + features2, + v, + }; + + Ok(krate) + }) + .collect() } } From a84f2e6f9ca01767e2cdd5160fbef66affb4d93e Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 20 Apr 2023 11:24:51 +0200 Subject: [PATCH 04/15] Krate::index_metadata: Sort by `created_at` first --- src/models/krate.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/models/krate.rs b/src/models/krate.rs index 2699634cd80..11206ec3a9e 100644 --- a/src/models/krate.rs +++ b/src/models/krate.rs @@ -441,10 +441,11 @@ impl Crate { conn: &mut PgConnection, ) -> QueryResult> { let mut versions: Vec = self.all_versions().load(conn)?; - versions.sort_by_cached_key(|k| { - semver::Version::parse(&k.num) - .expect("version was valid semver when inserted into the database") - }); + + // We sort by `created_at` by default, but since tests run within a + // single database transaction the versions will all have the same + // `created_at` timestamp, so we sort by semver as a secondary key. + versions.sort_by_cached_key(|k| (k.created_at, semver::Version::parse(&k.num).ok())); versions .into_iter() From 5d6d9fef3af85735ca2ff5e3163b3d643001f89e Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 20 Apr 2023 16:23:08 +0200 Subject: [PATCH 05/15] Krate::index_metadata: Add test --- src/tests/all.rs | 1 + src/tests/models/krate.rs | 47 +++++++++++++++++++ src/tests/models/mod.rs | 1 + .../all__models__krate__index_metadata-2.snap | 36 ++++++++++++++ .../all__models__krate__index_metadata.snap | 11 +++++ 5 files changed, 96 insertions(+) create mode 100644 src/tests/models/krate.rs create mode 100644 src/tests/models/mod.rs create mode 100644 src/tests/models/snapshots/all__models__krate__index_metadata-2.snap create mode 100644 src/tests/models/snapshots/all__models__krate__index_metadata.snap diff --git a/src/tests/all.rs b/src/tests/all.rs index a1f905edefb..64dccc50931 100644 --- a/src/tests/all.rs +++ b/src/tests/all.rs @@ -34,6 +34,7 @@ mod dump_db; mod github_secret_scanning; mod krate; mod middleware; +mod models; mod not_found_error; mod owners; mod pagination; diff --git a/src/tests/models/krate.rs b/src/tests/models/krate.rs new file mode 100644 index 00000000000..403f27e3344 --- /dev/null +++ b/src/tests/models/krate.rs @@ -0,0 +1,47 @@ +use crate::builders::{CrateBuilder, VersionBuilder}; +use crate::util::insta::assert_yaml_snapshot; +use crate::TestApp; +use chrono::{Days, Utc}; + +#[test] +fn index_metadata() { + let (app, _, user) = TestApp::init().with_user(); + let user = user.as_model(); + + app.db(|conn| { + let created_at_1 = Utc::now() + .checked_sub_days(Days::new(14)) + .unwrap() + .naive_utc(); + + let created_at_2 = Utc::now() + .checked_sub_days(Days::new(7)) + .unwrap() + .naive_utc(); + + let fooo = CrateBuilder::new("foo", user.id) + .version(VersionBuilder::new("0.1.0")) + .expect_build(conn); + + let metadata = fooo.index_metadata(conn).unwrap(); + assert_yaml_snapshot!(metadata); + + let bar = CrateBuilder::new("bar", user.id) + .version( + VersionBuilder::new("1.0.0-beta.1") + .created_at(created_at_1) + .yanked(true), + ) + .version(VersionBuilder::new("1.0.0").created_at(created_at_1)) + .version( + VersionBuilder::new("2.0.0") + .created_at(created_at_2) + .dependency(&fooo, None), + ) + .version(VersionBuilder::new("1.0.1").checksum("0123456789abcdef")) + .expect_build(conn); + + let metadata = bar.index_metadata(conn).unwrap(); + assert_yaml_snapshot!(metadata); + }); +} diff --git a/src/tests/models/mod.rs b/src/tests/models/mod.rs new file mode 100644 index 00000000000..c1ac1724be5 --- /dev/null +++ b/src/tests/models/mod.rs @@ -0,0 +1 @@ +mod krate; diff --git a/src/tests/models/snapshots/all__models__krate__index_metadata-2.snap b/src/tests/models/snapshots/all__models__krate__index_metadata-2.snap new file mode 100644 index 00000000000..2afe174ace3 --- /dev/null +++ b/src/tests/models/snapshots/all__models__krate__index_metadata-2.snap @@ -0,0 +1,36 @@ +--- +source: src/tests/models/krate.rs +expression: metadata +--- +- name: bar + vers: 1.0.0-beta.1 + deps: [] + cksum: " " + features: {} + yanked: true +- name: bar + vers: 1.0.0 + deps: [] + cksum: " " + features: {} + yanked: false +- name: bar + vers: 2.0.0 + deps: + - name: foo + req: ">= 0" + features: [] + optional: false + default_features: false + target: ~ + kind: normal + cksum: " " + features: {} + yanked: false +- name: bar + vers: 1.0.1 + deps: [] + cksum: "0123456789abcdef " + features: {} + yanked: false + diff --git a/src/tests/models/snapshots/all__models__krate__index_metadata.snap b/src/tests/models/snapshots/all__models__krate__index_metadata.snap new file mode 100644 index 00000000000..8822c445111 --- /dev/null +++ b/src/tests/models/snapshots/all__models__krate__index_metadata.snap @@ -0,0 +1,11 @@ +--- +source: src/tests/models/krate.rs +expression: metadata +--- +- name: foo + vers: 0.1.0 + deps: [] + cksum: " " + features: {} + yanked: false + From b4c38c3db327bc4e82b821a4cc8d176418a64d47 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 20 Apr 2023 12:48:43 +0200 Subject: [PATCH 06/15] Krate::index_metadata: Avoid 1+N database queries to fetch the dependencies --- src/models/krate.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/models/krate.rs b/src/models/krate.rs index 11206ec3a9e..73ca029163d 100644 --- a/src/models/krate.rs +++ b/src/models/krate.rs @@ -11,7 +11,7 @@ use crate::app::App; use crate::controllers::helpers::pagination::*; use crate::models::version::TopVersions; use crate::models::{ - CrateOwner, CrateOwnerInvitation, NewCrateOwnerInvitationOutcome, Owner, OwnerKind, + CrateOwner, CrateOwnerInvitation, Dependency, NewCrateOwnerInvitationOutcome, Owner, OwnerKind, ReverseDependency, User, Version, }; use crate::util::errors::{cargo_err, AppResult}; @@ -447,11 +447,18 @@ impl Crate { // `created_at` timestamp, so we sort by semver as a secondary key. versions.sort_by_cached_key(|k| (k.created_at, semver::Version::parse(&k.num).ok())); + let deps: Vec<(Dependency, String)> = Dependency::belonging_to(&versions) + .inner_join(crates::table) + .select((dependencies::all_columns, crates::name)) + .load(conn)?; + + let deps = deps.grouped_by(&versions); + versions .into_iter() - .map(|version| { - let mut deps: Vec = version - .dependencies(conn)? + .zip(deps) + .map(|(version, deps)| { + let mut deps = deps .into_iter() .map(|(dep, name)| { // If this dependency has an explicit name in `Cargo.toml` that @@ -463,6 +470,7 @@ impl Crate { Some(explicit_name) => (explicit_name, Some(name)), None => (name, None), }; + cargo_registry_index::Dependency { name, req: dep.req, @@ -474,7 +482,8 @@ impl Crate { target: dep.target, } }) - .collect(); + .collect::>(); + deps.sort(); let features: BTreeMap> = From bd575bd6d5f3ae3a33965ccfd878ccf7ee51e730 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 20 Apr 2023 11:08:03 +0200 Subject: [PATCH 07/15] index/Crate: Implement `write_to()` method --- Cargo.lock | 1 + cargo-registry-index/Cargo.toml | 7 +++++-- cargo-registry-index/lib.rs | 34 +++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ed2f09a4829..941e1a8349f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -513,6 +513,7 @@ version = "0.0.0" dependencies = [ "anyhow", "base64 0.13.1", + "claims", "dotenv", "git2", "serde", diff --git a/cargo-registry-index/Cargo.toml b/cargo-registry-index/Cargo.toml index a76fcbbdd35..5b82529ab77 100644 --- a/cargo-registry-index/Cargo.toml +++ b/cargo-registry-index/Cargo.toml @@ -10,7 +10,7 @@ edition = "2021" path = "lib.rs" [features] -testing = ["serde_json"] +testing = [] [dependencies] anyhow = "=1.0.70" @@ -18,7 +18,10 @@ base64 = "=0.13.1" dotenv = "=0.15.0" git2 = "=0.17.1" serde = { version = "=1.0.160", features = ["derive"] } +serde_json = "=1.0.96" tempfile = "=3.5.0" tracing = "=0.1.37" url = "=2.3.1" -serde_json = { version = "=1.0.96", optional = true } + +[dev-dependencies] +claims = "=0.7.1" diff --git a/cargo-registry-index/lib.rs b/cargo-registry-index/lib.rs index 5050e1b7810..3dcc5cbfb37 100644 --- a/cargo-registry-index/lib.rs +++ b/cargo-registry-index/lib.rs @@ -148,6 +148,14 @@ pub struct Crate { pub v: Option, } +impl Crate { + pub fn write_to(&self, mut writer: W) -> anyhow::Result<()> { + serde_json::to_writer(&mut writer, self)?; + writer.write_all(b"\n")?; + Ok(()) + } +} + #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct Dependency { pub name: String, @@ -595,3 +603,29 @@ pub fn run_via_cli(command: &mut Command, credentials: &Credentials) -> anyhow:: Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use claims::*; + + #[test] + fn crate_writer() { + let krate = Crate { + name: "foo".to_string(), + vers: "1.2.3".to_string(), + deps: vec![], + cksum: "0123456789asbcdef".to_string(), + features: Default::default(), + features2: None, + yanked: None, + links: None, + v: None, + }; + let mut buffer = Vec::new(); + assert_ok!(krate.write_to(&mut buffer)); + assert_ok_eq!(String::from_utf8(buffer), "\ + {\"name\":\"foo\",\"vers\":\"1.2.3\",\"deps\":[],\"cksum\":\"0123456789asbcdef\",\"features\":{},\"yanked\":null}\n\ + "); + } +} From edcc0acfbdb63cded213cedc9bc469f92bd320ee Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 20 Apr 2023 11:14:17 +0200 Subject: [PATCH 08/15] index/Crate: Implement `write_crates()` fn --- cargo-registry-index/lib.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/cargo-registry-index/lib.rs b/cargo-registry-index/lib.rs index 3dcc5cbfb37..6e0db04c871 100644 --- a/cargo-registry-index/lib.rs +++ b/cargo-registry-index/lib.rs @@ -156,6 +156,13 @@ impl Crate { } } +pub fn write_crates(crates: &[Crate], mut writer: W) -> anyhow::Result<()> { + for krate in crates { + krate.write_to(&mut writer)?; + } + Ok(()) +} + #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct Dependency { pub name: String, @@ -628,4 +635,32 @@ mod tests { {\"name\":\"foo\",\"vers\":\"1.2.3\",\"deps\":[],\"cksum\":\"0123456789asbcdef\",\"features\":{},\"yanked\":null}\n\ "); } + + #[test] + fn test_write_crates() { + let versions = vec!["0.1.0", "1.0.0-beta.1", "1.0.0", "1.2.3"]; + let crates = versions + .into_iter() + .map(|vers| Crate { + name: "foo".to_string(), + vers: vers.to_string(), + deps: vec![], + cksum: "0123456789asbcdef".to_string(), + features: Default::default(), + features2: None, + yanked: None, + links: None, + v: None, + }) + .collect::>(); + + let mut buffer = Vec::new(); + assert_ok!(write_crates(&crates, &mut buffer)); + assert_ok_eq!(String::from_utf8(buffer), "\ + {\"name\":\"foo\",\"vers\":\"0.1.0\",\"deps\":[],\"cksum\":\"0123456789asbcdef\",\"features\":{},\"yanked\":null}\n\ + {\"name\":\"foo\",\"vers\":\"1.0.0-beta.1\",\"deps\":[],\"cksum\":\"0123456789asbcdef\",\"features\":{},\"yanked\":null}\n\ + {\"name\":\"foo\",\"vers\":\"1.0.0\",\"deps\":[],\"cksum\":\"0123456789asbcdef\",\"features\":{},\"yanked\":null}\n\ + {\"name\":\"foo\",\"vers\":\"1.2.3\",\"deps\":[],\"cksum\":\"0123456789asbcdef\",\"features\":{},\"yanked\":null}\n\ + "); + } } From ae1535f52d02d2465919bce63f42c8a433d53ea5 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 21 Apr 2023 18:14:01 +0200 Subject: [PATCH 09/15] worker: Implement `sync_to_git/sparse_index()` jobs --- src/background_jobs.rs | 29 +++++++++++++ src/worker/git.rs | 97 +++++++++++++++++++++++++++++++++++++++++- src/worker/mod.rs | 2 +- 3 files changed, 125 insertions(+), 3 deletions(-) diff --git a/src/background_jobs.rs b/src/background_jobs.rs index 3e09ed14a10..dbc4889c94d 100644 --- a/src/background_jobs.rs +++ b/src/background_jobs.rs @@ -18,6 +18,8 @@ pub enum Job { IndexAddCrate(IndexAddCrateJob), IndexSquash, IndexSyncToHttp(IndexSyncToHttpJob), + SyncToGitIndex(SyncToIndexJob), + SyncToSparseIndex(SyncToIndexJob), IndexUpdateYanked(IndexUpdateYankedJob), NormalizeIndex(NormalizeIndexJob), RenderAndUploadReadme(RenderAndUploadReadmeJob), @@ -47,8 +49,22 @@ impl Job { const INDEX_UPDATE_YANKED: &str = "sync_yanked"; const NORMALIZE_INDEX: &str = "normalize_index"; const RENDER_AND_UPLOAD_README: &str = "render_and_upload_readme"; + const SYNC_TO_GIT_INDEX: &str = "sync_to_git_index"; + const SYNC_TO_SPARSE_INDEX: &str = "sync_to_sparse_index"; const UPDATE_DOWNLOADS: &str = "update_downloads"; + pub fn sync_to_git_index(krate: T) -> Job { + Job::SyncToGitIndex(SyncToIndexJob { + krate: krate.to_string(), + }) + } + + pub fn sync_to_sparse_index(krate: T) -> Job { + Job::SyncToSparseIndex(SyncToIndexJob { + krate: krate.to_string(), + }) + } + fn as_type_str(&self) -> &'static str { match self { Job::DailyDbMaintenance => Self::DAILY_DB_MAINTENANCE, @@ -59,6 +75,8 @@ impl Job { Job::IndexUpdateYanked(_) => Self::INDEX_UPDATE_YANKED, Job::NormalizeIndex(_) => Self::NORMALIZE_INDEX, Job::RenderAndUploadReadme(_) => Self::RENDER_AND_UPLOAD_README, + Job::SyncToGitIndex(_) => Self::SYNC_TO_GIT_INDEX, + Job::SyncToSparseIndex(_) => Self::SYNC_TO_SPARSE_INDEX, Job::UpdateDownloads => Self::UPDATE_DOWNLOADS, } } @@ -73,6 +91,8 @@ impl Job { Job::IndexUpdateYanked(inner) => serde_json::to_value(inner), Job::NormalizeIndex(inner) => serde_json::to_value(inner), Job::RenderAndUploadReadme(inner) => serde_json::to_value(inner), + Job::SyncToGitIndex(inner) => serde_json::to_value(inner), + Job::SyncToSparseIndex(inner) => serde_json::to_value(inner), Job::UpdateDownloads => Ok(serde_json::Value::Null), } } @@ -101,6 +121,8 @@ impl Job { Self::INDEX_UPDATE_YANKED => Job::IndexUpdateYanked(from_value(value)?), Self::NORMALIZE_INDEX => Job::NormalizeIndex(from_value(value)?), Self::RENDER_AND_UPLOAD_README => Job::RenderAndUploadReadme(from_value(value)?), + Self::SYNC_TO_GIT_INDEX => Job::SyncToGitIndex(from_value(value)?), + Self::SYNC_TO_SPARSE_INDEX => Job::SyncToSparseIndex(from_value(value)?), Self::UPDATE_DOWNLOADS => Job::UpdateDownloads, job_type => Err(PerformError::from(format!("Unknown job type {job_type}")))?, }) @@ -136,6 +158,8 @@ impl Job { args.base_url.as_deref(), args.pkg_path_in_vcs.as_deref(), ), + Job::SyncToGitIndex(args) => worker::sync_to_git_index(env, conn, &args.krate), + Job::SyncToSparseIndex(args) => worker::sync_to_sparse_index(env, conn, &args.krate), Job::UpdateDownloads => worker::perform_update_downloads(&mut *fresh_connection(pool)?), } } @@ -172,6 +196,11 @@ pub struct IndexSyncToHttpJob { pub(super) crate_name: String, } +#[derive(Serialize, Deserialize)] +pub struct SyncToIndexJob { + pub(super) krate: String, +} + #[derive(Serialize, Deserialize)] pub struct IndexUpdateYankedJob { pub(super) krate: String, diff --git a/src/worker/git.rs b/src/worker/git.rs index 3ff7ff21ba1..1520289bead 100644 --- a/src/worker/git.rs +++ b/src/worker/git.rs @@ -1,14 +1,15 @@ use crate::background_jobs::{ Environment, IndexAddCrateJob, IndexSyncToHttpJob, IndexUpdateYankedJob, Job, NormalizeIndexJob, }; +use crate::models; use crate::schema; use crate::swirl::PerformError; use anyhow::Context; use cargo_registry_index::{Crate, Repository}; use chrono::Utc; use diesel::prelude::*; -use std::fs::{self, OpenOptions}; -use std::io::{BufRead, BufReader, ErrorKind}; +use std::fs::{self, File, OpenOptions}; +use std::io::{BufRead, BufReader, ErrorKind, Write}; use std::process::Command; #[instrument(skip_all, fields(krate.name = ?krate.name, krate.vers = ?krate.vers))] @@ -74,6 +75,98 @@ pub fn update_crate_index(crate_name: String) -> Job { Job::IndexSyncToHttp(IndexSyncToHttpJob { crate_name }) } +/// Regenerates or removes an index file for a single crate +#[instrument(skip_all, fields(krate.name = ?krate))] +pub fn sync_to_git_index( + env: &Environment, + conn: &mut PgConnection, + krate: &str, +) -> Result<(), PerformError> { + info!("Syncing to git index"); + + let new = get_index_data(krate, conn).context("Failed to get index data")?; + + let repo = env.lock_index()?; + let dst = repo.index_file(krate); + + // Read the previous crate contents + let old = match fs::read_to_string(&dst) { + Ok(content) => Some(content), + Err(error) if error.kind() == ErrorKind::NotFound => None, + Err(error) => return Err(error.into()), + }; + + match (old, new) { + (None, Some(new)) => { + fs::create_dir_all(dst.parent().unwrap())?; + let mut file = File::create(&dst)?; + file.write_all(new.as_bytes())?; + repo.commit_and_push(&format!("Creating crate `{}`", krate), &dst)?; + } + (Some(old), Some(new)) if old != new => { + let mut file = File::create(&dst)?; + file.write_all(new.as_bytes())?; + repo.commit_and_push(&format!("Updating crate `{}`", krate), &dst)?; + } + (Some(_old), None) => { + fs::remove_file(&dst)?; + repo.commit_and_push(&format!("Deleting crate `{}`", krate), &dst)?; + } + _ => debug!("Skipping sync because index is up-to-date"), + } + + Ok(()) +} + +/// Regenerates or removes an index file for a single crate +#[instrument(skip_all, fields(krate.name = ?krate))] +pub fn sync_to_sparse_index( + env: &Environment, + conn: &mut PgConnection, + krate: &str, +) -> Result<(), PerformError> { + info!("Syncing to sparse index"); + + let content = get_index_data(krate, conn).context("Failed to get index data")?; + + env.uploader + .sync_index(env.http_client(), krate, content) + .context("Failed to sync index data")?; + + if let Some(cloudfront) = env.cloudfront() { + let path = Repository::relative_index_file_for_url(krate); + + info!(%path, "Invalidating index file on CloudFront"); + cloudfront + .invalidate(env.http_client(), &path) + .context("Failed to invalidate CloudFront")?; + } + + Ok(()) +} + +#[instrument(skip_all, fields(krate.name = ?name))] +pub fn get_index_data(name: &str, conn: &mut PgConnection) -> anyhow::Result> { + debug!("Looking up crate by name"); + let Some(krate): Option = models::Crate::by_exact_name(name).first(conn).optional()? else { + return Ok(None); + }; + + debug!("Gathering remaining index data"); + let crates = krate + .index_metadata(conn) + .context("Failed to gather index metadata")?; + + debug!("Serializing index data"); + let mut bytes = Vec::new(); + cargo_registry_index::write_crates(&crates, &mut bytes) + .context("Failed to serialize index metadata")?; + + let str = String::from_utf8(bytes).context("Failed to decode index metadata as utf8")?; + + Ok(Some(str)) +} + /// Yanks or unyanks a crate version. This requires finding the index /// file, deserlialise the crate from JSON, change the yank boolean to /// `true` or `false`, write all the lines back out, and commit and diff --git a/src/worker/mod.rs b/src/worker/mod.rs index 51f6eb3f9a0..c5be80bc827 100644 --- a/src/worker/mod.rs +++ b/src/worker/mod.rs @@ -20,7 +20,7 @@ pub(crate) use daily_db_maintenance::perform_daily_db_maintenance; pub(crate) use dump_db::perform_dump_db; pub(crate) use git::{ perform_index_add_crate, perform_index_squash, perform_index_sync_to_http, - perform_index_update_yanked, perform_normalize_index, + perform_index_update_yanked, perform_normalize_index, sync_to_git_index, sync_to_sparse_index, }; pub(crate) use readmes::perform_render_and_upload_readme; pub(crate) use update_downloads::perform_update_downloads; From ef234a43b05cb522091eb1235807c0a5a44d43fb Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 21 Apr 2023 18:18:38 +0200 Subject: [PATCH 10/15] publish: Use `sync_to_git/sparse_index()` jobs if `FEATURE_INDEX_SYNC` env var is set --- src/config.rs | 2 ++ src/controllers/krate/publish.rs | 9 ++++++++- .../krate_publish_new_krate_too_big_but_whitelisted.json | 4 ++-- src/tests/http-data/krate_publish_new_krate_twice.json | 4 ++-- ...ate_publish_publish_after_removing_documentation.json | 8 ++++---- src/tests/http-data/team_publish_owned.json | 4 ++-- src/tests/util/test_app.rs | 1 + 7 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/config.rs b/src/config.rs index 736820cbb0c..e6583fd1586 100644 --- a/src/config.rs +++ b/src/config.rs @@ -47,6 +47,7 @@ pub struct Server { pub version_id_cache_ttl: Duration, pub cdn_user_agent: String, pub balance_capacity: BalanceCapacityConfig, + pub feature_index_sync: bool, } impl Default for Server { @@ -151,6 +152,7 @@ impl Default for Server { cdn_user_agent: dotenv::var("WEB_CDN_USER_AGENT") .unwrap_or_else(|_| "Amazon CloudFront".into()), balance_capacity: BalanceCapacityConfig::from_environment(), + feature_index_sync: dotenv::var("FEATURE_INDEX_SYNC").is_ok(), } } } diff --git a/src/controllers/krate/publish.rs b/src/controllers/krate/publish.rs index 9e0fe3b09d4..9de28db45e3 100644 --- a/src/controllers/krate/publish.rs +++ b/src/controllers/krate/publish.rs @@ -1,6 +1,7 @@ //! Functionality related to publishing a new crate or version of a crate. use crate::auth::AuthCheck; +use crate::background_jobs::Job; use axum::body::Bytes; use flate2::read::GzDecoder; use hex::ToHex; @@ -271,7 +272,13 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult config::Server { version_id_cache_ttl: Duration::from_secs(5 * 60), cdn_user_agent: "Amazon CloudFront".to_string(), balance_capacity: BalanceCapacityConfig::for_testing(), + feature_index_sync: true, } } From 9337fb38179cc69f83b5092c933e0753b1225bdc Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 21 Apr 2023 18:42:31 +0200 Subject: [PATCH 11/15] version/yank: Use `sync_to_git/sparse_index()` jobs if `FEATURE_INDEX_SYNC` env var is set --- src/controllers/version/yank.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/controllers/version/yank.rs b/src/controllers/version/yank.rs index 45313ee93a8..ca9d842f731 100644 --- a/src/controllers/version/yank.rs +++ b/src/controllers/version/yank.rs @@ -1,6 +1,7 @@ //! Endpoints for yanking and unyanking specific versions of crates use crate::auth::AuthCheck; +use crate::background_jobs::Job; use super::version_and_crate; use crate::controllers::cargo_prelude::*; @@ -84,7 +85,12 @@ fn modify_yank( insert_version_owner_action(conn, version.id, user.id, api_token_id, action)?; - worker::sync_yanked(krate.name, version.num).enqueue(conn)?; + if state.config.feature_index_sync { + Job::sync_to_git_index(&krate.name).enqueue(conn)?; + Job::sync_to_sparse_index(&krate.name).enqueue(conn)?; + } else { + worker::sync_yanked(krate.name, version.num).enqueue(conn)?; + } ok_true() } From e0475adbb59dd75ca8a189cd9289d205811c6eb6 Mon Sep 17 00:00:00 2001 From: Arlo Siemsen Date: Thu, 18 Aug 2022 13:16:22 -0500 Subject: [PATCH 12/15] admin/delete_version: Use `sync_to_git/sparse_index()` jobs if `FEATURE_INDEX_SYNC` env var is set This ensures that we not only remove the version from the database, but also from the index. --- src/admin/delete_version.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/admin/delete_version.rs b/src/admin/delete_version.rs index 8c4f5f02510..cc4483e4e83 100644 --- a/src/admin/delete_version.rs +++ b/src/admin/delete_version.rs @@ -1,3 +1,4 @@ +use crate::background_jobs::Job; use crate::{ admin::dialoguer, db, @@ -57,4 +58,11 @@ fn delete(opts: Opts, conn: &mut PgConnection) { if !opts.yes && !dialoguer::confirm("commit?") { panic!("aborting transaction"); } + + if dotenv::var("FEATURE_INDEX_SYNC").is_ok() { + Job::sync_to_git_index(&krate.name).enqueue(conn).unwrap(); + Job::sync_to_sparse_index(&krate.name) + .enqueue(conn) + .unwrap(); + } } From d654abb3e4343003cbf3833a45fa796126fcdb48 Mon Sep 17 00:00:00 2001 From: Arlo Siemsen Date: Thu, 18 Aug 2022 13:16:22 -0500 Subject: [PATCH 13/15] admin/delete_crate: Use `sync_to_git/sparse_index()` jobs if `FEATURE_INDEX_SYNC` env var is set This ensures that the crate is deleted from both the git index and the sparse index, and that the CloudFront invalidation is correctly sent out. --- src/admin/delete_crate.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/admin/delete_crate.rs b/src/admin/delete_crate.rs index 10571ae6c63..203fe7ba962 100644 --- a/src/admin/delete_crate.rs +++ b/src/admin/delete_crate.rs @@ -1,3 +1,4 @@ +use crate::background_jobs::Job; use crate::{admin::dialoguer, config, db, models::Crate, schema::crates}; use diesel::prelude::*; @@ -54,5 +55,12 @@ fn delete(opts: Opts, conn: &mut PgConnection) { panic!("aborting transaction"); } - uploader.delete_index(&client, &krate.name).unwrap(); + if dotenv::var("FEATURE_INDEX_SYNC").is_ok() { + Job::sync_to_git_index(&krate.name).enqueue(conn).unwrap(); + Job::sync_to_sparse_index(&krate.name) + .enqueue(conn) + .unwrap(); + } else { + uploader.delete_index(&client, &krate.name).unwrap(); + } } From 1c0a95b441fd603d3af0df49399cfd6a06b548f2 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 21 Apr 2023 18:42:24 +0200 Subject: [PATCH 14/15] admin/yank_version: Use `sync_to_git/sparse_index()` jobs if `FEATURE_INDEX_SYNC` env var is set --- src/admin/yank_version.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/admin/yank_version.rs b/src/admin/yank_version.rs index 7ec3c0115f1..ac0d4247d9b 100644 --- a/src/admin/yank_version.rs +++ b/src/admin/yank_version.rs @@ -5,6 +5,7 @@ use crate::{ schema::versions, }; +use crate::background_jobs::Job; use diesel::prelude::*; #[derive(clap::Parser, Debug)] @@ -64,7 +65,14 @@ fn yank(opts: Opts, conn: &mut PgConnection) { .execute(conn) .unwrap(); - crate::worker::sync_yanked(krate.name, v.num) - .enqueue(conn) - .unwrap(); + if dotenv::var("FEATURE_INDEX_SYNC").is_ok() { + Job::sync_to_git_index(&krate.name).enqueue(conn).unwrap(); + Job::sync_to_sparse_index(&krate.name) + .enqueue(conn) + .unwrap(); + } else { + crate::worker::sync_yanked(krate.name, v.num) + .enqueue(conn) + .unwrap(); + } } From f669528d293990ffe4ff6d0c1651a5cf7cb87802 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 21 Apr 2023 21:29:58 +0200 Subject: [PATCH 15/15] Extract `Job::enqueue_sync_to_index()` fn --- src/admin/delete_crate.rs | 5 +---- src/admin/delete_version.rs | 5 +---- src/admin/yank_version.rs | 5 +---- src/background_jobs.rs | 25 +++++++++++++++++++++++++ src/controllers/krate/publish.rs | 3 +-- src/controllers/version/yank.rs | 3 +-- 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/admin/delete_crate.rs b/src/admin/delete_crate.rs index 203fe7ba962..ad1b5407f29 100644 --- a/src/admin/delete_crate.rs +++ b/src/admin/delete_crate.rs @@ -56,10 +56,7 @@ fn delete(opts: Opts, conn: &mut PgConnection) { } if dotenv::var("FEATURE_INDEX_SYNC").is_ok() { - Job::sync_to_git_index(&krate.name).enqueue(conn).unwrap(); - Job::sync_to_sparse_index(&krate.name) - .enqueue(conn) - .unwrap(); + Job::enqueue_sync_to_index(&krate.name, conn).unwrap(); } else { uploader.delete_index(&client, &krate.name).unwrap(); } diff --git a/src/admin/delete_version.rs b/src/admin/delete_version.rs index cc4483e4e83..b67842565b5 100644 --- a/src/admin/delete_version.rs +++ b/src/admin/delete_version.rs @@ -60,9 +60,6 @@ fn delete(opts: Opts, conn: &mut PgConnection) { } if dotenv::var("FEATURE_INDEX_SYNC").is_ok() { - Job::sync_to_git_index(&krate.name).enqueue(conn).unwrap(); - Job::sync_to_sparse_index(&krate.name) - .enqueue(conn) - .unwrap(); + Job::enqueue_sync_to_index(&krate.name, conn).unwrap(); } } diff --git a/src/admin/yank_version.rs b/src/admin/yank_version.rs index ac0d4247d9b..ee8c108cba6 100644 --- a/src/admin/yank_version.rs +++ b/src/admin/yank_version.rs @@ -66,10 +66,7 @@ fn yank(opts: Opts, conn: &mut PgConnection) { .unwrap(); if dotenv::var("FEATURE_INDEX_SYNC").is_ok() { - Job::sync_to_git_index(&krate.name).enqueue(conn).unwrap(); - Job::sync_to_sparse_index(&krate.name) - .enqueue(conn) - .unwrap(); + Job::enqueue_sync_to_index(&krate.name, conn).unwrap(); } else { crate::worker::sync_yanked(krate.name, v.num) .enqueue(conn) diff --git a/src/background_jobs.rs b/src/background_jobs.rs index dbc4889c94d..f0470979f08 100644 --- a/src/background_jobs.rs +++ b/src/background_jobs.rs @@ -53,6 +53,31 @@ impl Job { const SYNC_TO_SPARSE_INDEX: &str = "sync_to_sparse_index"; const UPDATE_DOWNLOADS: &str = "update_downloads"; + pub fn enqueue_sync_to_index( + krate: T, + conn: &mut PgConnection, + ) -> Result<(), EnqueueError> { + use crate::schema::background_jobs::dsl::*; + + let to_git = Self::sync_to_git_index(krate.to_string()); + let to_git = ( + job_type.eq(to_git.as_type_str()), + data.eq(to_git.to_value()?), + ); + + let to_sparse = Self::sync_to_sparse_index(krate.to_string()); + let to_sparse = ( + job_type.eq(to_sparse.as_type_str()), + data.eq(to_sparse.to_value()?), + ); + + diesel::insert_into(background_jobs) + .values(vec![to_git, to_sparse]) + .execute(conn)?; + + Ok(()) + } + pub fn sync_to_git_index(krate: T) -> Job { Job::SyncToGitIndex(SyncToIndexJob { krate: krate.to_string(), diff --git a/src/controllers/krate/publish.rs b/src/controllers/krate/publish.rs index 9de28db45e3..6cae3e46313 100644 --- a/src/controllers/krate/publish.rs +++ b/src/controllers/krate/publish.rs @@ -274,8 +274,7 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult