From daf0751ee08da8a48f182e321d8fb45c52ee03c9 Mon Sep 17 00:00:00 2001 From: Sean Griffin Date: Fri, 22 Mar 2019 11:01:45 -0600 Subject: [PATCH 1/2] Define `recent_crate_downloads` in terms of `version_downloads` The only place we're ever using `crate_downloads` directly is in a place that we should have been using `recent_crate_downloads`. Once we make that change, `crate_downloads` is only ever used to populate `recent_crate_downloads`, which can just as easily be populated from `version_downloads` instead. Populating it from `version_downloads` is a bit slower since the table is so much larger, and we don't have an index on it which we can use for this query. However, this doesn't really matter since we run this asynchronously. Now that we're on PG 11, I also intend to partition `version_downloads` on date which will make populating the view much faster once again. Note that I have not removed `crate_downloads` yet. That needs to be deployed separately, as it's a destructive change which would break running code, so we can't run that migration until the code which is no longer relying on `crate_downloads` is deployed --- .../down.sql | 6 ++ .../up.sql | 8 +++ src/bin/update-downloads.rs | 28 +--------- src/controllers/krate/metadata.rs | 14 ++--- src/models.rs | 2 +- src/models/krate.rs | 11 ++-- src/tests/builders.rs | 56 ++++++++----------- 7 files changed, 50 insertions(+), 75 deletions(-) create mode 100644 migrations/2019-03-22-162022_define_recent_crate_downloads_in_terms_of_version_downloads/down.sql create mode 100644 migrations/2019-03-22-162022_define_recent_crate_downloads_in_terms_of_version_downloads/up.sql diff --git a/migrations/2019-03-22-162022_define_recent_crate_downloads_in_terms_of_version_downloads/down.sql b/migrations/2019-03-22-162022_define_recent_crate_downloads_in_terms_of_version_downloads/down.sql new file mode 100644 index 00000000000..6ef215bc5a2 --- /dev/null +++ b/migrations/2019-03-22-162022_define_recent_crate_downloads_in_terms_of_version_downloads/down.sql @@ -0,0 +1,6 @@ +DROP MATERIALIZED VIEW recent_crate_downloads; +CREATE MATERIALIZED VIEW recent_crate_downloads (crate_id, downloads) AS + SELECT crate_id, SUM(downloads) FROM crate_downloads + WHERE date > date(CURRENT_TIMESTAMP - INTERVAL '90 days') + GROUP BY crate_id; +CREATE UNIQUE INDEX recent_crate_downloads_crate_id ON recent_crate_downloads (crate_id); diff --git a/migrations/2019-03-22-162022_define_recent_crate_downloads_in_terms_of_version_downloads/up.sql b/migrations/2019-03-22-162022_define_recent_crate_downloads_in_terms_of_version_downloads/up.sql new file mode 100644 index 00000000000..ba704a2ad05 --- /dev/null +++ b/migrations/2019-03-22-162022_define_recent_crate_downloads_in_terms_of_version_downloads/up.sql @@ -0,0 +1,8 @@ +DROP MATERIALIZED VIEW recent_crate_downloads; +CREATE MATERIALIZED VIEW recent_crate_downloads (crate_id, downloads) AS + SELECT crate_id, SUM(version_downloads.downloads) FROM version_downloads + INNER JOIN versions + ON version_downloads.version_id = versions.id + WHERE version_downloads.date > date(CURRENT_TIMESTAMP - INTERVAL '90 days') + GROUP BY crate_id; +CREATE UNIQUE INDEX recent_crate_downloads_crate_id ON recent_crate_downloads (crate_id); diff --git a/src/bin/update-downloads.rs b/src/bin/update-downloads.rs index b31394d5e57..bf526afcf64 100644 --- a/src/bin/update-downloads.rs +++ b/src/bin/update-downloads.rs @@ -6,7 +6,7 @@ extern crate diesel; use cargo_registry::{ db, models::VersionDownload, - schema::{crate_downloads, crates, metadata, version_downloads, versions}, + schema::{crates, metadata, version_downloads, versions}, util::CargoResult, }; @@ -53,7 +53,7 @@ fn update(conn: &PgConnection) -> QueryResult<()> { } fn collect(conn: &PgConnection, rows: &[VersionDownload]) -> QueryResult<()> { - use diesel::{insert_into, update}; + use diesel::update; println!("updating {} versions", rows.len()); @@ -77,18 +77,6 @@ fn collect(conn: &PgConnection, rows: &[VersionDownload]) -> QueryResult<()> { .set(crates::downloads.eq(crates::downloads + amt)) .execute(conn)?; - // Update the total number of crate downloads for today - insert_into(crate_downloads::table) - .values(( - crate_downloads::crate_id.eq(crate_id), - crate_downloads::downloads.eq(amt), - crate_downloads::date.eq(download.date), - )) - .on_conflict(crate_downloads::table.primary_key()) - .do_update() - .set(crate_downloads::downloads.eq(crate_downloads::downloads + amt)) - .execute(conn)?; - // Now that everything else for this crate is done, update the global counter of total // downloads update(metadata::table) @@ -144,16 +132,6 @@ mod test { (krate, version) } - macro_rules! crate_downloads { - ($conn:expr, $id:expr, $expected:expr) => { - let dl = crate_downloads::table - .filter(crate_downloads::crate_id.eq($id)) - .select(crate_downloads::downloads) - .first($conn); - assert_eq!(Ok($expected as i32), dl); - }; - } - #[test] fn increment() { use diesel::dsl::*; @@ -185,7 +163,6 @@ mod test { .select(crates::downloads) .first(&conn); assert_eq!(Ok(1), crate_downloads); - crate_downloads!(&conn, krate.id, 1); crate::update(&conn).unwrap(); let version_downloads = versions::table .find(version.id) @@ -298,7 +275,6 @@ mod test { .unwrap(); assert_eq!(krate2.downloads, 2); assert_eq!(krate2.updated_at, krate_before.updated_at); - crate_downloads!(&conn, krate.id, 1); crate::update(&conn).unwrap(); let version3 = versions::table .find(version.id) diff --git a/src/controllers/krate/metadata.rs b/src/controllers/krate/metadata.rs index 726d377a8d6..04ba2c128c3 100644 --- a/src/controllers/krate/metadata.rs +++ b/src/controllers/krate/metadata.rs @@ -6,8 +6,8 @@ use crate::controllers::prelude::*; use crate::models::{ - Category, Crate, CrateCategory, CrateDownload, CrateKeyword, CrateVersions, Keyword, User, - Version, + Category, Crate, CrateCategory, CrateKeyword, CrateVersions, Keyword, RecentCrateDownloads, + User, Version, }; use crate::schema::*; use crate::views::{ @@ -101,8 +101,6 @@ pub fn summary(req: &mut dyn Request) -> CargoResult { /// Handles the `GET /crates/:crate_id` route. pub fn show(req: &mut dyn Request) -> CargoResult { - use diesel::dsl::*; - let name = &req.params()["crate_id"]; let conn = req.db_conn()?; let krate = Crate::by_name(name).first::(&*conn)?; @@ -123,10 +121,10 @@ pub fn show(req: &mut dyn Request) -> CargoResult { .inner_join(categories::table) .select(categories::all_columns) .load(&*conn)?; - let recent_downloads = CrateDownload::belonging_to(&krate) - .filter(crate_downloads::date.gt(date(now - 90.days()))) - .select(sum(crate_downloads::downloads)) - .get_result(&*conn)?; + let recent_downloads = RecentCrateDownloads::belonging_to(&krate) + .select(recent_crate_downloads::downloads) + .get_result(&*conn) + .optional()?; let badges = badges::table .filter(badges::crate_id.eq(krate.id)) diff --git a/src/models.rs b/src/models.rs index 1a5aca4f33f..9968177d294 100644 --- a/src/models.rs +++ b/src/models.rs @@ -6,7 +6,7 @@ pub use self::download::VersionDownload; pub use self::email::{Email, NewEmail}; pub use self::follow::Follow; pub use self::keyword::{CrateKeyword, Keyword}; -pub use self::krate::{Crate, CrateDownload, CrateVersions, NewCrate}; +pub use self::krate::{Crate, CrateVersions, NewCrate, RecentCrateDownloads}; pub use self::owner::{CrateOwner, Owner, OwnerKind}; pub use self::rights::Rights; pub use self::team::{NewTeam, Team}; diff --git a/src/models/krate.rs b/src/models/krate.rs index 030c6c39911..022f30d7fe8 100644 --- a/src/models/krate.rs +++ b/src/models/krate.rs @@ -1,4 +1,4 @@ -use chrono::{NaiveDate, NaiveDateTime}; +use chrono::NaiveDateTime; use diesel::associations::Identifiable; use diesel::pg::Pg; use diesel::prelude::*; @@ -20,14 +20,13 @@ use crate::schema::*; /// and are possibly of malicious intent e.g. ad tracking networks, etc. const DOCUMENTATION_BLOCKLIST: [&str; 1] = ["rust-ci.org"]; -#[derive(Debug, Insertable, Queryable, Identifiable, Associations, AsChangeset, Clone, Copy)] +#[derive(Debug, Queryable, Identifiable, Associations, Clone, Copy)] #[belongs_to(Crate)] -#[primary_key(crate_id, date)] -#[table_name = "crate_downloads"] -pub struct CrateDownload { +#[primary_key(crate_id)] +#[table_name = "recent_crate_downloads"] +pub struct RecentCrateDownloads { pub crate_id: i32, pub downloads: i32, - pub date: NaiveDate, } #[derive(Debug, Clone, Queryable, Identifiable, Associations, AsChangeset, QueryableByName)] diff --git a/src/tests/builders.rs b/src/tests/builders.rs index 6dac66c995d..31a51878933 100644 --- a/src/tests/builders.rs +++ b/src/tests/builders.rs @@ -1,14 +1,13 @@ //! Structs using the builder pattern that make it easier to create records in tests. use cargo_registry::{ - models::{Crate, CrateDownload, Keyword, NewCrate, NewVersion, Version}, - schema::{crate_downloads, dependencies, versions}, + models::{Crate, Keyword, NewCrate, NewVersion, Version}, + schema::{crates, dependencies, version_downloads, versions}, util::CargoResult, views::krate_publish as u, }; use std::{collections::HashMap, io::Read}; -use chrono::Utc; use diesel::prelude::*; use flate2::{write::GzEncoder, Compression}; @@ -240,45 +239,34 @@ impl<'a> CrateBuilder<'a> { // Since we are using `NewCrate`, we can't set all the // crate properties in a single DB call. - let old_downloads = self.downloads.unwrap_or(0) - self.recent_downloads.unwrap_or(0); - let now = Utc::now(); - let old_date = now.naive_utc().date() - chrono::Duration::days(91); - if let Some(downloads) = self.downloads { - let crate_download = CrateDownload { - crate_id: krate.id, - downloads: old_downloads, - date: old_date, - }; - - insert_into(crate_downloads::table) - .values(&crate_download) - .execute(connection)?; - krate.downloads = downloads; - update(&krate).set(&krate).execute(connection)?; - } - - if self.recent_downloads.is_some() { - let crate_download = CrateDownload { - crate_id: krate.id, - downloads: self.recent_downloads.unwrap(), - date: now.naive_utc().date(), - }; - - insert_into(crate_downloads::table) - .values(&crate_download) - .execute(connection)?; - - no_arg_sql_function!(refresh_recent_crate_downloads, ()); - select(refresh_recent_crate_downloads).execute(connection)?; + krate = update(&krate) + .set(crates::downloads.eq(downloads)) + .returning(cargo_registry::models::krate::ALL_COLUMNS) + .get_result(connection)?; } if self.versions.is_empty() { self.versions.push(VersionBuilder::new("0.99.0")); } + let mut last_version_id = 0; for version_builder in self.versions { - version_builder.build(krate.id, self.owner_id, connection)?; + last_version_id = version_builder + .build(krate.id, self.owner_id, connection)? + .id; + } + + if let Some(downloads) = self.recent_downloads { + insert_into(version_downloads::table) + .values(( + version_downloads::version_id.eq(last_version_id), + version_downloads::downloads.eq(downloads), + )) + .execute(connection)?; + + no_arg_sql_function!(refresh_recent_crate_downloads, ()); + select(refresh_recent_crate_downloads).execute(connection)?; } if !self.keywords.is_empty() { From f6702c63429ffeb8c43abac50f7c2e5ffb7e4be7 Mon Sep 17 00:00:00 2001 From: Sean Griffin Date: Fri, 22 Mar 2019 11:12:40 -0600 Subject: [PATCH 2/2] Remove `crate_downloads` No part of our code base relies on this table any longer, so we can just remove it. --- .../down.sql | 15 +++++++++ .../up.sql | 3 ++ src/schema.patch | 3 +- src/schema.rs | 31 ------------------- 4 files changed, 19 insertions(+), 33 deletions(-) create mode 100644 migrations/2019-03-22-170700_remove_crate_downloads/down.sql create mode 100644 migrations/2019-03-22-170700_remove_crate_downloads/up.sql diff --git a/migrations/2019-03-22-170700_remove_crate_downloads/down.sql b/migrations/2019-03-22-170700_remove_crate_downloads/down.sql new file mode 100644 index 00000000000..d636c3376f3 --- /dev/null +++ b/migrations/2019-03-22-170700_remove_crate_downloads/down.sql @@ -0,0 +1,15 @@ +CREATE TABLE crate_downloads ( + crate_id INTEGER NOT NULL REFERENCES crates (id), + downloads INTEGER NOT NULL, + date DATE NOT NULL, + PRIMARY KEY (crate_id, date) +); +CREATE INDEX "index_crate_downloads_crate_id" ON crate_downloads (crate_id); +CREATE INDEX "index_crate_downloads_date" ON crate_downloads (date); + +INSERT INTO crate_downloads (crate_id, downloads, date) + SELECT crate_id, sum(version_downloads.downloads), date + FROM version_downloads + INNER JOIN versions + ON version_downloads.version_id = versions.id + GROUP BY (crate_id, date); diff --git a/migrations/2019-03-22-170700_remove_crate_downloads/up.sql b/migrations/2019-03-22-170700_remove_crate_downloads/up.sql new file mode 100644 index 00000000000..e6603aee3f4 --- /dev/null +++ b/migrations/2019-03-22-170700_remove_crate_downloads/up.sql @@ -0,0 +1,3 @@ +-- ALERT! DESTRUCTIVE MIGRATION +-- DO NOT DEPLOY UNTIL PREVIOUS COMMIT IS DEPLOYED +DROP TABLE crate_downloads; diff --git a/src/schema.patch b/src/schema.patch index 0a754fa2202..1a65430ec4c 100644 --- a/src/schema.patch +++ b/src/schema.patch @@ -55,11 +55,10 @@ index df884e4..18e08cd 100644 /// Representation of the `reserved_crate_names` table. /// -@@ -881,22 +901,24 @@ table! { +@@ -881,21 +901,23 @@ table! { } joinable!(api_tokens -> users (user_id)); - joinable!(crate_downloads -> crates (crate_id)); joinable!(crate_owner_invitations -> crates (crate_id)); joinable!(crate_owners -> crates (crate_id)); -joinable!(crate_owners -> users (created_by)); diff --git a/src/schema.rs b/src/schema.rs index c89bd7d4eba..527584fe51c 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -176,35 +176,6 @@ table! { } } -table! { - use diesel::sql_types::*; - use diesel_full_text_search::{TsVector as Tsvector}; - - /// Representation of the `crate_downloads` table. - /// - /// (Automatically generated by Diesel.) - crate_downloads (crate_id, date) { - /// The `crate_id` column of the `crate_downloads` table. - /// - /// Its SQL type is `Int4`. - /// - /// (Automatically generated by Diesel.) - crate_id -> Int4, - /// The `downloads` column of the `crate_downloads` table. - /// - /// Its SQL type is `Int4`. - /// - /// (Automatically generated by Diesel.) - downloads -> Int4, - /// The `date` column of the `crate_downloads` table. - /// - /// Its SQL type is `Date`. - /// - /// (Automatically generated by Diesel.) - date -> Date, - } -} - table! { use diesel::sql_types::*; use diesel_full_text_search::{TsVector as Tsvector}; @@ -956,7 +927,6 @@ table! { } joinable!(api_tokens -> users (user_id)); -joinable!(crate_downloads -> crates (crate_id)); joinable!(crate_owner_invitations -> crates (crate_id)); joinable!(crate_owners -> crates (crate_id)); joinable!(crate_owners -> teams (owner_id)); @@ -984,7 +954,6 @@ allow_tables_to_appear_in_same_query!( background_jobs, badges, categories, - crate_downloads, crate_owner_invitations, crate_owners, crates,