diff --git a/Cargo.lock b/Cargo.lock index 9881223c65b..033617e4d15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,7 +32,7 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "cipher", "cpufeatures", "opaque-debug 0.3.0", @@ -129,7 +129,7 @@ checksum = "321629d8ba6513061f26707241fa9bc89524ff1cd7a915a97ef0c62c666ce1b6" dependencies = [ "addr2line", "cc", - "cfg-if", + "cfg-if 1.0.0", "libc", "miniz_oxide", "object", @@ -196,6 +196,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" +[[package]] +name = "bytecount" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e" + [[package]] name = "byteorder" version = "1.4.3" @@ -208,6 +214,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" +[[package]] +name = "cargo-platform" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbdb825da8a5df079a43676dbe042702f1707b1109f713a01420fbb4cc71fa27" +dependencies = [ + "serde", +] + [[package]] name = "cargo-registry" version = "0.2.2" @@ -249,6 +264,7 @@ dependencies = [ "lettre", "license-exprs", "minijinja", + "moka", "oauth2", "parking_lot", "prometheus", @@ -295,6 +311,19 @@ dependencies = [ "sha-1 0.9.8", ] +[[package]] +name = "cargo_metadata" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714a157da7991e23d90686b9524b9e12e0407a108647f52e9328f4b3d51ac7f" +dependencies = [ + "cargo-platform", + "semver 0.11.0", + "semver-parser 0.10.2", + "serde", + "serde_json", +] + [[package]] name = "cc" version = "1.0.71" @@ -304,6 +333,12 @@ dependencies = [ "jobserver", ] +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.0" @@ -574,7 +609,53 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils 0.8.5", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" +dependencies = [ + "autocfg", + "cfg-if 0.1.10", + "crossbeam-utils 0.7.2", + "lazy_static", + "maybe-uninit", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" +dependencies = [ + "autocfg", + "cfg-if 0.1.10", + "lazy_static", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" +dependencies = [ + "cfg-if 1.0.0", + "lazy_static", ] [[package]] @@ -612,7 +693,7 @@ version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "num_cpus", ] @@ -756,7 +837,7 @@ version = "0.8.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a74ea89a0a1b98f6332de42c95baff457ada66d1cb4030f9ff151b2041a1c746" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -765,6 +846,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "version_check", +] + [[package]] name = "fake-simd" version = "0.1.2" @@ -786,7 +876,7 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "975ccf83d8d9d0d84682850a38c8169027be83368805971cc4f238c2b245bc98" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "redox_syscall", "winapi", @@ -798,7 +888,7 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "crc32fast", "libc", "miniz_oxide", @@ -941,7 +1031,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi 0.9.0+wasi-snapshot-preview1", ] @@ -952,7 +1042,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "js-sys", "libc", "wasi 0.10.0+wasi-snapshot-preview1", @@ -1001,6 +1091,12 @@ dependencies = [ "url", ] +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + [[package]] name = "h2" version = "0.3.7" @@ -1200,7 +1296,7 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -1348,7 +1444,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -1357,6 +1453,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + [[package]] name = "maplit" version = "1.0.2" @@ -1410,12 +1515,27 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" +[[package]] +name = "maybe-uninit" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" + [[package]] name = "memchr" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memoffset" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "043175f069eda7b85febe4a74abbaeff828d9f8b448515d3151a14a3542811aa" +dependencies = [ + "autocfg", +] + [[package]] name = "migrations_internals" version = "1.4.1" @@ -1490,6 +1610,34 @@ dependencies = [ "winapi", ] +[[package]] +name = "moka" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0d5cf145315e1c07d9d67f07e12276902d86a952dac392cee7b895303b699d" +dependencies = [ + "crossbeam-channel", + "moka-cht", + "num_cpus", + "once_cell", + "parking_lot", + "quanta", + "scheduled-thread-pool", + "skeptic", + "thiserror", + "uuid", +] + +[[package]] +name = "moka-cht" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829e4b3f52dff046d1824842169dc0a91319f5e923e20a651b4e85697e03feea" +dependencies = [ + "crossbeam-epoch", + "num_cpus", +] + [[package]] name = "native-tls" version = "0.2.8" @@ -1617,7 +1765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7ae222234c30df141154f159066c5093ff73b63204dcda7121eb082fc56a95" dependencies = [ "bitflags", - "cfg-if", + "cfg-if 1.0.0", "foreign-types", "libc", "once_cell", @@ -1669,7 +1817,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "instant", "libc", "redox_syscall", @@ -1826,7 +1974,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8419d2b623c7c0896ff2d5d96e2cb4ede590fed28fcc34934f4c33c036e620a1" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "cpufeatures", "opaque-debug 0.3.0", "universal-hash", @@ -1904,7 +2052,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7f64969ffd5dd8f39bd57a68ac53c163a095ed9d0fb707146da1b27025a3504" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "fnv", "lazy_static", "memchr", @@ -1912,6 +2060,33 @@ dependencies = [ "thiserror", ] +[[package]] +name = "pulldown-cmark" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8" +dependencies = [ + "bitflags", + "memchr", + "unicase", +] + +[[package]] +name = "quanta" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20afe714292d5e879d8b12740aa223c6a88f118af41870e8b6196e39a02238a8" +dependencies = [ + "crossbeam-utils 0.8.5", + "libc", + "mach", + "once_cell", + "raw-cpuid", + "wasi 0.10.0+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "quote" version = "1.0.10" @@ -2029,6 +2204,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "raw-cpuid" +version = "10.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "929f54e29691d4e6a9cc558479de70db7aa3d98cd6fe7ab86d7507aa2886b9d2" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_syscall" version = "0.2.10" @@ -2156,6 +2340,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.19" @@ -2210,7 +2403,17 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ - "semver-parser", + "semver-parser 0.7.0", +] + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser 0.10.2", + "serde", ] [[package]] @@ -2228,6 +2431,15 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" +[[package]] +name = "semver-parser" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + [[package]] name = "sentry" version = "0.23.0" @@ -2403,7 +2615,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99cd6713db3cf16b6c84e06321e049a9b9f699826e16096d23bbcc44d15d51a6" dependencies = [ "block-buffer 0.9.0", - "cfg-if", + "cfg-if 1.0.0", "cpufeatures", "digest 0.9.0", "opaque-debug 0.3.0", @@ -2422,7 +2634,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa" dependencies = [ "block-buffer 0.9.0", - "cfg-if", + "cfg-if 1.0.0", "cpufeatures", "digest 0.9.0", "opaque-debug 0.3.0", @@ -2458,6 +2670,21 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b" +[[package]] +name = "skeptic" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "188b810342d98f23f0bb875045299f34187b559370b041eb11520c905370a888" +dependencies = [ + "bytecount", + "cargo_metadata", + "error-chain", + "glob", + "pulldown-cmark", + "tempfile", + "walkdir", +] + [[package]] name = "slab" version = "0.4.5" @@ -2628,7 +2855,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "rand 0.8.4", "redox_syscall", @@ -2901,7 +3128,7 @@ version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3118,6 +3345,17 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" @@ -3146,7 +3384,7 @@ version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "wasm-bindgen-macro", ] @@ -3171,7 +3409,7 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "js-sys", "wasm-bindgen", "web-sys", diff --git a/Cargo.toml b/Cargo.toml index c9ccb419e2a..156e715b504 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,7 @@ tikv-jemallocator = { version = "0.4.1", features = ['unprefixed_malloc_on_suppo lettre = { version = "0.10.0-rc.4", default-features = false, features = ["file-transport", "smtp-transport", "native-tls", "hostname", "builder"] } license-exprs = "1.6" minijinja = "0.8.1" +moka = "0.6.1" oauth2 = { version = "4.1.0", default-features = false, features = ["reqwest"] } parking_lot = "0.11" prometheus = { version = "0.13.0", default-features = false } diff --git a/src/app.rs b/src/app.rs index a0ba91e2916..9a15e0f1f44 100644 --- a/src/app.rs +++ b/src/app.rs @@ -9,6 +9,7 @@ use crate::email::Emails; use crate::github::GitHubClient; use crate::metrics::{InstanceMetrics, ServiceMetrics}; use diesel::r2d2; +use moka::sync::{Cache, CacheBuilder}; use oauth2::basic::BasicClient; use reqwest::blocking::Client; use scheduled_thread_pool::ScheduledThreadPool; @@ -31,6 +32,12 @@ pub struct App { /// The server configuration pub config: config::Server, + /// Cache the `version_id` of a `canonical_crate_name:semver` pair + /// + /// This is used by the download endpoint to reduce the number of database queries. The + /// `version_id` is only cached under the canonical spelling of the crate name. + pub(crate) version_id_cacher: Cache<(String, String), i32>, + /// Count downloads and periodically persist them in the database pub downloads_counter: DownloadsCounter, @@ -148,12 +155,17 @@ impl App { None }; + let version_id_cacher = CacheBuilder::new(config.version_id_cache_size) + .time_to_live(config.version_id_cache_ttl) + .build(); + App { primary_database, read_only_replica_database: replica_database, github, github_oauth, config, + version_id_cacher, downloads_counter: DownloadsCounter::new(), emails: Emails::from_environment(), service_metrics: ServiceMetrics::new().expect("could not initialize service metrics"), diff --git a/src/config.rs b/src/config.rs index b70eef15e0c..a2a36c27a33 100644 --- a/src/config.rs +++ b/src/config.rs @@ -7,6 +7,10 @@ mod database_pools; pub use self::base::Base; pub use self::database_pools::DatabasePools; use std::collections::HashSet; +use std::time::Duration; + +const DEFAULT_VERSION_ID_CACHE_SIZE: usize = 10_000; +const DEFAULT_VERSION_ID_CACHE_TTL: u64 = 5 * 60; // 5 minutes pub struct Server { pub base: Base, @@ -30,6 +34,8 @@ pub struct Server { pub instance_metrics_log_every_seconds: Option, pub force_unconditional_redirects: bool, pub blocked_routes: HashSet, + pub version_id_cache_size: usize, + pub version_id_cache_ttl: Duration, } impl Default for Server { @@ -107,6 +113,11 @@ impl Default for Server { blocked_routes: env_optional("BLOCKED_ROUTES") .map(|routes: String| routes.split(',').map(|s| s.into()).collect()) .unwrap_or_else(HashSet::new), + version_id_cache_size: env_optional("VERSION_ID_CACHE_SIZE") + .unwrap_or(DEFAULT_VERSION_ID_CACHE_SIZE), + version_id_cache_ttl: Duration::from_secs( + env_optional("VERSION_ID_CACHE_TTL").unwrap_or(DEFAULT_VERSION_ID_CACHE_TTL), + ), } } } diff --git a/src/controllers/version/downloads.rs b/src/controllers/version/downloads.rs index 92fca6e47ae..997baabccc1 100644 --- a/src/controllers/version/downloads.rs +++ b/src/controllers/version/downloads.rs @@ -18,75 +18,88 @@ pub fn download(req: &mut dyn RequestExt) -> EndpointResult { let mut crate_name = req.params()["crate_id"].clone(); let version = req.params()["version"].as_str(); - // When no database connection is ready unconditional redirects will be performed. This could - // happen if the pool is not healthy or if an operator manually configured the application to - // always perform unconditional redirects (for example as part of the mitigations for an - // outage). See the comments below for a description of what unconditional redirects do. - let conn = if app.config.force_unconditional_redirects { - None - } else { - match req.db_conn() { - Ok(conn) => Some(conn), - Err(PoolError::UnhealthyPool) => None, - Err(err) => return Err(err.into()), - } - }; - let mut log_metadata = None; - if let Some(conn) = &conn { - use self::versions::dsl::*; - - // Returns the crate name as stored in the database, or an error if we could - // not load the version ID from the database. - let (version_id, canonical_crate_name) = app - .instance_metrics - .downloads_select_query_execution_time - .observe_closure_duration(|| { - versions - .inner_join(crates::table) - .select((id, crates::name)) - .filter(Crate::with_name(&crate_name)) - .filter(num.eq(version)) - .first::<(i32, String)>(&**conn) - })?; - - if canonical_crate_name != crate_name { - app.instance_metrics - .downloads_non_canonical_crate_name_total - .inc(); - log_metadata = Some(("bot", "dl")); - } - crate_name = canonical_crate_name; + + let cache_key = (crate_name.to_string(), version.to_string()); + if let Some(version_id) = app.version_id_cacher.get(&cache_key) { + app.instance_metrics.version_id_cache_hits.inc(); // The increment does not happen instantly, but it's deferred to be executed in a batch // along with other downloads. See crate::downloads_counter for the implementation. app.downloads_counter.increment(version_id); } else { - // The download endpoint is the most critical route in the whole crates.io application, - // as it's relied upon by users and automations to download crates. Keeping it working - // is the most important thing for us. - // - // The endpoint relies on the database to fetch the canonical crate name (with the - // right capitalization and hyphenation), but that's only needed to serve clients who - // don't call the endpoint with the crate's canonical name. - // - // Thankfully Cargo always uses the right name when calling the endpoint, and we can - // keep it working during a full database outage by unconditionally redirecting without - // checking whether the crate exists or the rigth name is used. Non-Cargo clients might - // get a 404 response instead of a 500, but that's worth it. - // - // Without a working database we also can't count downloads, but that's also less - // critical than keeping Cargo downloads operational. - - app.instance_metrics - .downloads_unconditional_redirects_total - .inc(); - log_metadata = Some(("unconditional_redirect", "true")); - } + app.instance_metrics.version_id_cache_misses.inc(); + + // When no database connection is ready unconditional redirects will be performed. This could + // happen if the pool is not healthy or if an operator manually configured the application to + // always perform unconditional redirects (for example as part of the mitigations for an + // outage). See the comments below for a description of what unconditional redirects do. + let conn = if app.config.force_unconditional_redirects { + None + } else { + match req.db_conn() { + Ok(conn) => Some(conn), + Err(PoolError::UnhealthyPool) => None, + Err(err) => return Err(err.into()), + } + }; + + if let Some(conn) = &conn { + use self::versions::dsl::*; + + // Returns the crate name as stored in the database, or an error if we could + // not load the version ID from the database. + let (version_id, canonical_crate_name) = app + .instance_metrics + .downloads_select_query_execution_time + .observe_closure_duration(|| { + versions + .inner_join(crates::table) + .select((id, crates::name)) + .filter(Crate::with_name(&crate_name)) + .filter(num.eq(version)) + .first::<(i32, String)>(&**conn) + })?; + + if canonical_crate_name != crate_name { + app.instance_metrics + .downloads_non_canonical_crate_name_total + .inc(); + log_metadata = Some(("bot", "dl")); + crate_name = canonical_crate_name; + } else { + // The version_id is only cached if the provided crate name was canonical. + // Non-canonical requests fallback to the "slow" path with a DB query, but + // we typically only get a few hundred non-canonical requests in a day anyway. + app.version_id_cacher.insert(cache_key, version_id); + } + + // The increment does not happen instantly, but it's deferred to be executed in a batch + // along with other downloads. See crate::downloads_counter for the implementation. + app.downloads_counter.increment(version_id); + } else { + // The download endpoint is the most critical route in the whole crates.io application, + // as it's relied upon by users and automations to download crates. Keeping it working + // is the most important thing for us. + // + // The endpoint relies on the database to fetch the canonical crate name (with the + // right capitalization and hyphenation), but that's only needed to serve clients who + // don't call the endpoint with the crate's canonical name. + // + // Thankfully Cargo always uses the right name when calling the endpoint, and we can + // keep it working during a full database outage by unconditionally redirecting without + // checking whether the crate exists or the rigth name is used. Non-Cargo clients might + // get a 404 response instead of a 500, but that's worth it. + // + // Without a working database we also can't count downloads, but that's also less + // critical than keeping Cargo downloads operational. - // Ensure the connection is released to the pool as soon as possible, as the download endpoint - // covers the majority of our traffic and we don't want it to starve other requests. - drop(conn); + app.instance_metrics + .downloads_unconditional_redirects_total + .inc(); + log_metadata = Some(("unconditional_redirect", "true")); + } + }; let redirect_url = req .app() diff --git a/src/metrics/instance.rs b/src/metrics/instance.rs index bed355dbcaf..4bba8f8483a 100644 --- a/src/metrics/instance.rs +++ b/src/metrics/instance.rs @@ -50,6 +50,11 @@ metrics! { pub downloads_select_query_execution_time: Histogram, /// Number of download requests that are not counted yet. downloads_not_counted_total: IntGauge, + + /// Number of version ID cache hits on the download endpoint. + pub version_id_cache_hits: IntCounter, + /// Number of version ID cache misses on the download endpoint. + pub version_id_cache_misses: IntCounter, } // All instance metrics will be prefixed with this namespace. diff --git a/src/tests/krate/downloads.rs b/src/tests/krate/downloads.rs index d66bcb77ce7..bbc97137dba 100644 --- a/src/tests/krate/downloads.rs +++ b/src/tests/krate/downloads.rs @@ -1,5 +1,5 @@ use crate::builders::{CrateBuilder, VersionBuilder}; -use crate::util::{RequestHelper, TestApp}; +use crate::util::{MockAnonymousUser, RequestHelper, TestApp}; use cargo_registry::views::EncodableVersionDownload; use chrono::{Duration, Utc}; use http::StatusCode; @@ -9,6 +9,35 @@ struct Downloads { version_downloads: Vec, } +fn persist_downloads_count(app: &TestApp) { + app.as_inner() + .downloads_counter + .persist_all_shards(app.as_inner()) + .expect("failed to persist downloads count") + .log(); +} + +#[track_caller] +fn assert_dl_count( + anon: &MockAnonymousUser, + name_and_version: &str, + query: Option<&str>, + count: i32, +) { + let url = format!("/api/v1/crates/{}/downloads", name_and_version); + let downloads: Downloads = if let Some(query) = query { + anon.get_with_query(&url, query).good() + } else { + anon.get(&url).good() + }; + let total_downloads = downloads + .version_downloads + .iter() + .map(|vd| vd.downloads) + .sum::(); + assert_eq!(total_downloads, count); +} + #[test] fn download() { let (app, anon, user) = TestApp::init().with_user(); @@ -20,21 +49,6 @@ fn download() { .expect_build(conn); }); - let assert_dl_count = |name_and_version: &str, query: Option<&str>, count: i32| { - let url = format!("/api/v1/crates/{}/downloads", name_and_version); - let downloads: Downloads = if let Some(query) = query { - anon.get_with_query(&url, query).good() - } else { - anon.get(&url).good() - }; - let total_downloads = downloads - .version_downloads - .iter() - .map(|vd| vd.downloads) - .sum::(); - assert_eq!(total_downloads, count); - }; - let download = |name_and_version: &str| { let url = format!("/api/v1/crates/{}/download", name_and_version); let response = anon.get::<()>(&url); @@ -42,38 +56,30 @@ fn download() { // TODO: test the with_json code path }; - let persist_downloads_count = || { - app.as_inner() - .downloads_counter - .persist_all_shards(app.as_inner()) - .expect("failed to persist downloads count") - .log(); - }; - download("foo_download/1.0.0"); // No downloads are counted until the counters are persisted - assert_dl_count("foo_download/1.0.0", None, 0); - assert_dl_count("foo_download", None, 0); - persist_downloads_count(); + assert_dl_count(&anon, "foo_download/1.0.0", None, 0); + assert_dl_count(&anon, "foo_download", None, 0); + persist_downloads_count(&app); // Now that the counters are persisted the download counts show up. - assert_dl_count("foo_download/1.0.0", None, 1); - assert_dl_count("foo_download", None, 1); + assert_dl_count(&anon, "foo_download/1.0.0", None, 1); + assert_dl_count(&anon, "foo_download", None, 1); download("FOO_DOWNLOAD/1.0.0"); - persist_downloads_count(); - assert_dl_count("FOO_DOWNLOAD/1.0.0", None, 2); - assert_dl_count("FOO_DOWNLOAD", None, 2); + persist_downloads_count(&app); + assert_dl_count(&anon, "FOO_DOWNLOAD/1.0.0", None, 2); + assert_dl_count(&anon, "FOO_DOWNLOAD", None, 2); let yesterday = (Utc::today() + Duration::days(-1)).format("%F"); let query = format!("before_date={}", yesterday); - assert_dl_count("FOO_DOWNLOAD/1.0.0", Some(&query), 0); + assert_dl_count(&anon, "FOO_DOWNLOAD/1.0.0", Some(&query), 0); // crate/downloads always returns the last 90 days and ignores date params - assert_dl_count("FOO_DOWNLOAD", Some(&query), 2); + assert_dl_count(&anon, "FOO_DOWNLOAD", Some(&query), 2); let tomorrow = (Utc::today() + Duration::days(1)).format("%F"); let query = format!("before_date={}", tomorrow); - assert_dl_count("FOO_DOWNLOAD/1.0.0", Some(&query), 2); - assert_dl_count("FOO_DOWNLOAD", Some(&query), 2); + assert_dl_count(&anon, "FOO_DOWNLOAD/1.0.0", Some(&query), 2); + assert_dl_count(&anon, "FOO_DOWNLOAD", Some(&query), 2); } #[test] @@ -136,3 +142,38 @@ fn force_unconditional_redirect() { anon.get::<()>("/api/v1/crates/bar-download/1.0.0/download") .assert_redirect_ends_with("/crates/bar-download/bar-download-1.0.0.crate"); } + +#[test] +fn download_caches_version_id() { + use cargo_registry::schema::crates::dsl::*; + use diesel::prelude::*; + + let (app, anon, user) = TestApp::init().with_user(); + let user = user.as_model(); + + app.db(|conn| { + CrateBuilder::new("foo_download", user.id) + .version(VersionBuilder::new("1.0.0")) + .expect_build(conn); + }); + + anon.get::<()>("/api/v1/crates/foo_download/1.0.0/download") + .assert_redirect_ends_with("/crates/foo_download/foo_download-1.0.0.crate"); + + // Rename the crate, so that `foo_download` will not be found if its version_id was not cached + app.db(|conn| { + diesel::update(crates.filter(name.eq("foo_download"))) + .set(name.eq("other")) + .execute(conn) + .unwrap(); + }); + + // This would result in a 404 if the endpoint tried to read from the database + anon.get::<()>("/api/v1/crates/foo_download/1.0.0/download") + .assert_redirect_ends_with("/crates/foo_download/foo_download-1.0.0.crate"); + + // Downloads are persisted by version_id, so the rename doesn't matter + persist_downloads_count(&app); + // Check download count against the new name, rather than rename it back to the original value + assert_dl_count(&anon, "other/1.0.0", None, 2); +} diff --git a/src/tests/util/test_app.rs b/src/tests/util/test_app.rs index 0ea7de4795a..d00ece78a7c 100644 --- a/src/tests/util/test_app.rs +++ b/src/tests/util/test_app.rs @@ -337,6 +337,8 @@ fn simple_config() -> config::Server { instance_metrics_log_every_seconds: None, force_unconditional_redirects: false, blocked_routes: HashSet::new(), + version_id_cache_size: 10000, + version_id_cache_ttl: Duration::from_secs(5 * 60), } }