Skip to content

Add tool to import from git index into the database #5112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions src/admin/git_import.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
use std::{
fs::File,
io::{BufRead, BufReader},
thread,
time::Duration,
};

use anyhow::Context;
use cargo_registry_index::{Repository, RepositoryConfig};
use diesel::prelude::*;
use indicatif::{ProgressBar, ProgressIterator, ProgressStyle};

use crate::{
admin::dialoguer,
db,
schema::{crates, dependencies, versions},
};

#[derive(clap::Parser, Debug, Copy, Clone)]
#[clap(
name = "git-import",
about = "Import missing fields from git into the database"
)]
pub struct Opts {
/// Time in milliseconds to sleep between crate updates to reduce database load.
#[clap(long)]
delay: u64,
}

pub fn run(opts: Opts) -> anyhow::Result<()> {
let conn = db::oneoff_connection().unwrap();
println!("fetching git repo");
let config = RepositoryConfig::from_environment();
let repo = Repository::open(&config)?;
repo.reset_head()?;
println!("HEAD is at {}", repo.head_oid()?);
let files = repo.get_files_modified_since(None)?;
println!("found {} crates", files.len());
if !dialoguer::confirm("continue?") {
return Ok(());
}

let pb = ProgressBar::new(files.len() as u64);
pb.set_style(ProgressStyle::with_template("{bar:60} ({pos}/{len}, ETA {eta})").unwrap());

for file in files.iter().progress_with(pb) {
thread::sleep(Duration::from_millis(opts.delay));
let crate_name = file.file_name().unwrap().to_str().unwrap();
let path = repo.index_file(crate_name);
if !path.exists() {
continue;
}
let file = File::open(path)?;
let reader = BufReader::new(file);
for line in reader.lines() {
let krate: cargo_registry_index::Crate = serde_json::from_str(&line?)?;
conn.transaction(|| {
import_data(&conn, &krate)
.with_context(|| format!("failed to update crate: {krate:?}"))
})?;
}
}

Ok(())
}

fn import_data(conn: &PgConnection, krate: &cargo_registry_index::Crate) -> QueryResult<()> {
let version_id: i32 = versions::table
.inner_join(crates::table)
.filter(crates::name.eq(&krate.name))
.filter(versions::num.eq(&krate.vers))
.select(versions::id)
.first(conn)?;

// Update the `checksum` and `links` fields.
diesel::update(versions::table)
.set((
versions::checksum.eq(&krate.cksum),
versions::links.eq(&krate.links),
))
.filter(versions::id.eq(version_id))
.execute(conn)?;
// Update the `explicit_name` field for each dependency.
for dep in &krate.deps {
if let Some(package) = &dep.package {
// This is a little tricky because there can be two identical deps in the
// database. The only difference in git is the field we're trying to
// fill (explicit_name). Using `first` here & filtering out existing `explicit_name`
// entries ensure that we assign one explicit_name to each dep.
let id: i32 = dependencies::table
.inner_join(crates::table)
.filter(dependencies::explicit_name.is_null())
.filter(dependencies::version_id.eq(version_id))
.filter(dependencies::req.eq(&dep.req))
.filter(dependencies::features.eq(&dep.features))
.filter(dependencies::optional.eq(&dep.optional))
.filter(dependencies::default_features.eq(&dep.default_features))
.filter(dependencies::target.is_not_distinct_from(&dep.target))
.filter(dependencies::kind.eq(dep.kind.map(|k| k as i32).unwrap_or_default()))
.filter(crates::name.eq(package))
.select(dependencies::id)
.first(conn)?;
diesel::update(dependencies::table)
.set(dependencies::explicit_name.eq(&dep.name))
.filter(dependencies::id.eq(id))
.execute(conn)?;
}
}
Ok(())
}
1 change: 1 addition & 0 deletions src/admin/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod delete_crate;
pub mod delete_version;
pub mod dialoguer;
pub mod git_import;
pub mod migrate;
pub mod on_call;
pub mod populate;
Expand Down
4 changes: 3 additions & 1 deletion src/bin/crates-admin.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![warn(clippy::all, rust_2018_idioms)]

use cargo_registry::admin::{
delete_crate, delete_version, migrate, populate, render_readmes, test_pagerduty,
delete_crate, delete_version, git_import, migrate, populate, render_readmes, test_pagerduty,
transfer_crates, upload_index, verify_token, yank_version,
};

Expand All @@ -24,6 +24,7 @@ enum SubCommand {
Migrate(migrate::Opts),
UploadIndex(upload_index::Opts),
YankVersion(yank_version::Opts),
GitImport(git_import::Opts),
}

fn main() -> anyhow::Result<()> {
Expand All @@ -42,6 +43,7 @@ fn main() -> anyhow::Result<()> {
SubCommand::Migrate(opts) => migrate::run(opts)?,
SubCommand::UploadIndex(opts) => upload_index::run(opts)?,
SubCommand::YankVersion(opts) => yank_version::run(opts),
SubCommand::GitImport(opts) => git_import::run(opts)?,
}

Ok(())
Expand Down