Skip to content

Extract crate_downloads table #8232

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions migrations/2024-03-04-092044_crate-downloads-table/down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
drop trigger insert_crate_downloads_row on crates;
drop function insert_crate_downloads_row;
drop table crate_downloads;
39 changes: 39 additions & 0 deletions migrations/2024-03-04-092044_crate-downloads-table/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
-- Create the `crate_downloads` table.

create table crate_downloads
(
crate_id integer not null
constraint crate_downloads_pk
primary key
constraint crate_downloads_crates_id_fk
references crates
on delete cascade,
downloads bigint default 0 not null
);

comment on table crate_downloads is 'Number of downloads per crate. This was extracted from the `crates` table for performance reasons.';
comment on column crate_downloads.crate_id is 'Reference to the crate that this row belongs to.';
comment on column crate_downloads.downloads is 'The total number of downloads for this crate.';

-- Create a trigger to automatically add a row to `crate_downloads` when a new
-- crate is inserted into the `crates` table.

create or replace function insert_crate_downloads_row() returns trigger as $$
begin
insert into crate_downloads(crate_id) values (new.id);
return new;
end;
$$ language plpgsql;

create trigger insert_crate_downloads_row
after insert on crates
for each row
execute function insert_crate_downloads_row();

-- The following query can take a couple of seconds so it should be run manually
-- outside of the migration to prevent the server from taking a long time to
-- start up while waiting for the migration to complete.

-- insert into crate_downloads (crate_id, downloads)
-- select id, downloads
-- from crates;
20 changes: 11 additions & 9 deletions src/schema.patch
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
--- src/schema.rs.orig 2024-03-04 10:34:35
+++ src/schema.rs 2024-03-04 10:33:35
@@ -21,9 +21,7 @@
/// The `pg_catalog.tsvector` SQL type
///
Expand All @@ -7,7 +9,7 @@
- pub struct Tsvector;
+ pub use diesel_full_text_search::Tsvector;
}

diesel::table! {
@@ -74,9 +72,9 @@
/// (Automatically generated by Diesel.)
Expand All @@ -33,8 +35,8 @@
- path -> Ltree,
}
}

@@ -456,7 +448,7 @@
@@ -476,7 +468,7 @@
/// Its SQL type is `Array<Nullable<Text>>`.
///
/// (Automatically generated by Diesel.)
Expand All @@ -43,9 +45,9 @@
/// The `target` column of the `dependencies` table.
///
/// Its SQL type is `Nullable<Varchar>`.
@@ -683,6 +675,24 @@
@@ -703,6 +695,24 @@
}

diesel::table! {
+ /// Representation of the `recent_crate_downloads` view.
+ ///
Expand All @@ -68,8 +70,8 @@
/// Representation of the `reserved_crate_names` table.
///
/// (Automatically generated by Diesel.)
@@ -997,7 +1007,8 @@
diesel::joinable!(api_tokens -> users (user_id));
@@ -1018,7 +1028,8 @@
diesel::joinable!(crate_downloads -> crates (crate_id));
diesel::joinable!(crate_owner_invitations -> crates (crate_id));
diesel::joinable!(crate_owners -> crates (crate_id));
-diesel::joinable!(crate_owners -> users (created_by));
Expand All @@ -78,15 +80,15 @@
diesel::joinable!(crates_categories -> categories (category_id));
diesel::joinable!(crates_categories -> crates (crate_id));
diesel::joinable!(crates_keywords -> crates (crate_id));
@@ -1010,6 +1021,7 @@
@@ -1031,6 +1042,7 @@
diesel::joinable!(publish_limit_buckets -> users (user_id));
diesel::joinable!(publish_rate_overrides -> users (user_id));
diesel::joinable!(readme_renderings -> versions (version_id));
+diesel::joinable!(recent_crate_downloads -> crates (crate_id));
diesel::joinable!(version_downloads -> versions (version_id));
diesel::joinable!(version_owner_actions -> api_tokens (api_token_id));
diesel::joinable!(version_owner_actions -> users (user_id));
@@ -1036,6 +1048,7 @@
@@ -1058,6 +1070,7 @@
publish_limit_buckets,
publish_rate_overrides,
readme_renderings,
Expand Down
12 changes: 12 additions & 0 deletions src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,16 @@ diesel::table! {
}
}

diesel::table! {
/// Number of downloads per crate. This was extracted from the `crates` table for performance reasons.
crate_downloads (crate_id) {
/// Reference to the crate that this row belongs to.
crate_id -> Int4,
/// The total number of downloads for this crate.
downloads -> Int8,
}
}

diesel::table! {
/// Representation of the `crate_owner_invitations` table.
///
Expand Down Expand Up @@ -1005,6 +1015,7 @@ diesel::table! {
}

diesel::joinable!(api_tokens -> users (user_id));
diesel::joinable!(crate_downloads -> crates (crate_id));
diesel::joinable!(crate_owner_invitations -> crates (crate_id));
diesel::joinable!(crate_owners -> crates (crate_id));
diesel::joinable!(crate_owners -> teams (owner_id));
Expand Down Expand Up @@ -1034,6 +1045,7 @@ diesel::allow_tables_to_appear_in_same_query!(
api_tokens,
background_jobs,
categories,
crate_downloads,
crate_owner_invitations,
crate_owners,
crates,
Expand Down
5 changes: 5 additions & 0 deletions src/tests/builders/krate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crates_io::{
};

use chrono::NaiveDateTime;
use crates_io::schema::crate_downloads;
use diesel::prelude::*;

use super::VersionBuilder;
Expand Down Expand Up @@ -125,6 +126,10 @@ impl<'a> CrateBuilder<'a> {
// crate properties in a single DB call.

if let Some(downloads) = self.downloads {
update(crate_downloads::table.filter(crate_downloads::crate_id.eq(krate.id)))
.set(crate_downloads::downloads.eq(downloads as i64))
.execute(connection)?;

krate = update(&krate)
.set(crates::downloads.eq(downloads))
.returning(Crate::as_returning())
Expand Down
30 changes: 29 additions & 1 deletion src/worker/jobs/downloads/update_metadata.rs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we expand the test data for crates and their versions to ensure we are updating the correct one?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess that would generally make sense. Since that was already a problem with the existing code I don't want to conflate that with the purpose of this PR though :)

Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ fn batch_update(batch_size: i64, conn: &mut PgConnection) -> QueryResult<i64> {
SET downloads = crates.downloads + crate_downloads_batch.downloads
FROM crate_downloads_batch
WHERE crates.id = crate_downloads_batch.crate_id
), updated_crate_downloads AS (
-- Update the `downloads` count for each crate in the `crate_downloads` table.
UPDATE crate_downloads
SET downloads = crate_downloads.downloads + crate_downloads_batch.downloads
FROM crate_downloads_batch
WHERE crate_downloads.crate_id = crate_downloads_batch.crate_id
), updated_metadata AS (
-- Update the `total_downloads` count in the `metadata` table.
UPDATE metadata
Expand Down Expand Up @@ -170,7 +176,7 @@ mod tests {
use super::*;
use crate::email::Emails;
use crate::models::{Crate, NewCrate, NewUser, NewVersion, User, Version};
use crate::schema::{crates, versions};
use crate::schema::{crate_downloads, crates, versions};
use crate::test_util::test_db_connection;
use std::collections::BTreeMap;

Expand Down Expand Up @@ -224,17 +230,27 @@ mod tests {
.unwrap();

super::update(conn).unwrap();

let version_downloads = versions::table
.find(version.id)
.select(versions::downloads)
.first(conn);
assert_eq!(version_downloads, Ok(1));

let crate_downloads = crates::table
.find(krate.id)
.select(crates::downloads)
.first(conn);
assert_eq!(crate_downloads, Ok(1));

let crate_downloads = crate_downloads::table
.find(krate.id)
.select(crate_downloads::downloads)
.first(conn);
assert_eq!(crate_downloads, Ok(1));

super::update(conn).unwrap();

let version_downloads = versions::table
.find(version.id)
.select(versions::downloads)
Expand Down Expand Up @@ -330,17 +346,29 @@ mod tests {
.filter(crates::id.eq(krate.id))
.first(conn)
.unwrap();

super::update(conn).unwrap();

let version2: Version = versions::table.find(version.id).first(conn).unwrap();
assert_eq!(version2.downloads, 2);
assert_eq!(version2.updated_at, version_before.updated_at);

let krate2: Crate = Crate::all()
.filter(crates::id.eq(krate.id))
.first(conn)
.unwrap();
assert_eq!(krate2.downloads, 2);
assert_eq!(krate2.updated_at, krate_before.updated_at);

let krate2_downloads: i64 = crate_downloads::table
.find(krate.id)
.select(crate_downloads::downloads)
.first(conn)
.unwrap();
assert_eq!(krate2_downloads, 2);

super::update(conn).unwrap();

let version3: Version = versions::table.find(version.id).first(conn).unwrap();
assert_eq!(version3.downloads, 2);
}
Expand Down
4 changes: 4 additions & 0 deletions src/worker/jobs/dump_db/dump-db.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ crates_cnt = "public"
created_at = "public"
path = "public"

[crate_downloads.columns]
crate_id = "public"
downloads = "public"

[crate_owner_invitations.columns]
invited_user_id = "private"
invited_by_user_id = "private"
Expand Down