Skip to content

Commit 3c448ad

Browse files
committed
Auto merge of #3775 - pietroalbini:more-histograms, r=Turbo87
Add more histograms This PR adds more histograms to the metrics we collect, exposing information that could be useful when we get paged. Two metrics were added: * `cratesio_instance_database_time_to_obtain_connection`: how long it takes to obtain a database connection * `cratesio_instance_downloads_select_query_execution_time`: how long the `SELECT` in the download endpoint takes A side effect of this is that `TimingRecorder` is not needed anymore: we used it to measure those two things, appending them in the logs for slow requests, but now that we have metrics there is no need to have it. If we need to measure more timings in the future we can just add more histograms. This PR thus removes `TimingRecorder` from the codebase. r? `@jtgeibel` or `@Turbo87` This can be reviewed commit-by-commit.
2 parents 10aec2f + 2fff66c commit 3c448ad

File tree

6 files changed

+57
-77
lines changed

6 files changed

+57
-77
lines changed

src/app.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ impl App {
6262
pub fn new(config: config::Server, http_client: Option<Client>) -> App {
6363
use oauth2::{AuthUrl, ClientId, ClientSecret, TokenUrl};
6464

65+
let instance_metrics =
66+
InstanceMetrics::new().expect("could not initialize instance metrics");
67+
6568
let github = GitHubClient::new(http_client.clone(), config.gh_base_url.clone());
6669

6770
let github_oauth = BasicClient::new(
@@ -116,7 +119,14 @@ impl App {
116119
.connection_customizer(Box::new(primary_db_connection_config))
117120
.thread_pool(thread_pool.clone());
118121

119-
DieselPool::new(&config.db.primary.url, primary_db_config).unwrap()
122+
DieselPool::new(
123+
&config.db.primary.url,
124+
primary_db_config,
125+
instance_metrics
126+
.database_time_to_obtain_connection
127+
.with_label_values(&["primary"]),
128+
)
129+
.unwrap()
120130
};
121131

122132
let replica_database = if let Some(url) = config.db.replica.as_ref().map(|c| &c.url) {
@@ -135,7 +145,16 @@ impl App {
135145
.connection_customizer(Box::new(replica_db_connection_config))
136146
.thread_pool(thread_pool);
137147

138-
Some(DieselPool::new(url, replica_db_config).unwrap())
148+
Some(
149+
DieselPool::new(
150+
url,
151+
replica_db_config,
152+
instance_metrics
153+
.database_time_to_obtain_connection
154+
.with_label_values(&["follower"]),
155+
)
156+
.unwrap(),
157+
)
139158
}
140159
} else {
141160
None
@@ -150,8 +169,7 @@ impl App {
150169
downloads_counter: DownloadsCounter::new(),
151170
emails: Emails::from_environment(),
152171
service_metrics: ServiceMetrics::new().expect("could not initialize service metrics"),
153-
instance_metrics: InstanceMetrics::new()
154-
.expect("could not initialize instance metrics"),
172+
instance_metrics,
155173
http_client,
156174
}
157175
}

src/controllers.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ mod prelude {
1919

2020
pub use crate::db::RequestTransaction;
2121
pub use crate::middleware::app::RequestApp;
22-
pub use crate::middleware::log_request::TimingRecorder;
2322
pub use crate::util::errors::{cargo_err, AppError, AppResult, ChainError}; // TODO: Remove cargo_err from here
2423
pub use crate::util::{AppResponse, EndpointResult};
2524

@@ -39,7 +38,6 @@ mod prelude {
3938
fn query_with_params(&self, params: IndexMap<String, String>) -> String;
4039

4140
fn log_metadata<V: std::fmt::Display>(&mut self, key: &'static str, value: V);
42-
fn timing_recorder(&mut self) -> TimingRecorder;
4341
}
4442

4543
impl<'a> RequestUtils for dyn RequestExt + 'a {
@@ -80,17 +78,6 @@ mod prelude {
8078
fn log_metadata<V: std::fmt::Display>(&mut self, key: &'static str, value: V) {
8179
crate::middleware::log_request::add_custom_metadata(self, key, value);
8280
}
83-
84-
fn timing_recorder(&mut self) -> TimingRecorder {
85-
if let Some(recorder) = self.extensions().find::<TimingRecorder>() {
86-
recorder.clone()
87-
} else {
88-
let recorder = TimingRecorder::new();
89-
self.mut_extensions()
90-
.insert::<TimingRecorder>(recorder.clone());
91-
recorder
92-
}
93-
}
9481
}
9582
}
9683

src/controllers/version/downloads.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,28 @@ use chrono::{Duration, NaiveDate, Utc};
1414
/// This returns a URL to the location where the crate is stored.
1515
pub fn download(req: &mut dyn RequestExt) -> EndpointResult {
1616
let app = req.app().clone();
17-
let recorder = req.timing_recorder();
1817

1918
let mut crate_name = req.params()["crate_id"].clone();
2019
let version = req.params()["version"].as_str();
2120

2221
let mut log_metadata = None;
23-
match recorder.record("get_conn", || req.db_conn()) {
22+
match req.db_conn() {
2423
Ok(conn) => {
2524
use self::versions::dsl::*;
2625

2726
// Returns the crate name as stored in the database, or an error if we could
2827
// not load the version ID from the database.
29-
let (version_id, canonical_crate_name) = recorder.record("get_version", || {
30-
versions
31-
.inner_join(crates::table)
32-
.select((id, crates::name))
33-
.filter(Crate::with_name(&crate_name))
34-
.filter(num.eq(version))
35-
.first::<(i32, String)>(&*conn)
36-
})?;
28+
let (version_id, canonical_crate_name) = app
29+
.instance_metrics
30+
.downloads_select_query_execution_time
31+
.observe_closure_duration(|| {
32+
versions
33+
.inner_join(crates::table)
34+
.select((id, crates::name))
35+
.filter(Crate::with_name(&crate_name))
36+
.filter(num.eq(version))
37+
.first::<(i32, String)>(&*conn)
38+
})?;
3739

3840
if canonical_crate_name != crate_name {
3941
app.instance_metrics

src/db.rs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use conduit::RequestExt;
22
use diesel::prelude::*;
33
use diesel::r2d2::{self, ConnectionManager, CustomizeConnection};
44
use parking_lot::{ReentrantMutex, ReentrantMutexGuard};
5+
use prometheus::Histogram;
56
use std::sync::Arc;
67
use std::{ops::Deref, time::Duration};
78
use url::Url;
@@ -10,14 +11,18 @@ use crate::middleware::app::RequestApp;
1011

1112
#[derive(Clone)]
1213
pub enum DieselPool {
13-
Pool(r2d2::Pool<ConnectionManager<PgConnection>>),
14+
Pool {
15+
pool: r2d2::Pool<ConnectionManager<PgConnection>>,
16+
time_to_obtain_connection_metric: Histogram,
17+
},
1418
Test(Arc<ReentrantMutex<PgConnection>>),
1519
}
1620

1721
impl DieselPool {
1822
pub(crate) fn new(
1923
url: &str,
2024
config: r2d2::Builder<ConnectionManager<PgConnection>>,
25+
time_to_obtain_connection_metric: Histogram,
2126
) -> Result<DieselPool, PoolError> {
2227
let manager = ConnectionManager::new(connection_url(url));
2328

@@ -32,7 +37,10 @@ impl DieselPool {
3237
// serving errors for the first connections until the pool is initialized) and if we can't
3338
// establish any connection continue booting up the application. The database pool will
3439
// automatically be marked as unhealthy and the rest of the application will adapt.
35-
let pool = DieselPool::Pool(config.build_unchecked(manager));
40+
let pool = DieselPool::Pool {
41+
pool: config.build_unchecked(manager),
42+
time_to_obtain_connection_metric,
43+
};
3644
match pool.wait_until_healthy(Duration::from_secs(5)) {
3745
Ok(()) => {}
3846
Err(PoolError::UnhealthyPool) => {}
@@ -52,22 +60,25 @@ impl DieselPool {
5260

5361
pub fn get(&self) -> Result<DieselPooledConn<'_>, PoolError> {
5462
match self {
55-
DieselPool::Pool(pool) => {
63+
DieselPool::Pool {
64+
pool,
65+
time_to_obtain_connection_metric,
66+
} => time_to_obtain_connection_metric.observe_closure_duration(|| {
5667
if let Some(conn) = pool.try_get() {
5768
Ok(DieselPooledConn::Pool(conn))
5869
} else if !self.is_healthy() {
5970
Err(PoolError::UnhealthyPool)
6071
} else {
6172
Ok(DieselPooledConn::Pool(pool.get().map_err(PoolError::R2D2)?))
6273
}
63-
}
74+
}),
6475
DieselPool::Test(conn) => Ok(DieselPooledConn::Test(conn.lock())),
6576
}
6677
}
6778

6879
pub fn state(&self) -> PoolState {
6980
match self {
70-
DieselPool::Pool(pool) => {
81+
DieselPool::Pool { pool, .. } => {
7182
let state = pool.state();
7283
PoolState {
7384
connections: state.connections,
@@ -83,7 +94,7 @@ impl DieselPool {
8394

8495
pub fn wait_until_healthy(&self, timeout: Duration) -> Result<(), PoolError> {
8596
match self {
86-
DieselPool::Pool(pool) => match pool.get_timeout(timeout) {
97+
DieselPool::Pool { pool, .. } => match pool.get_timeout(timeout) {
8798
Ok(_) => Ok(()),
8899
Err(_) if !self.is_healthy() => Err(PoolError::UnhealthyPool),
89100
Err(err) => Err(PoolError::R2D2(err)),

src/metrics/instance.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
use crate::util::errors::AppResult;
2121
use crate::{app::App, db::DieselPool};
2222
use prometheus::{
23-
proto::MetricFamily, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
23+
proto::MetricFamily, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
2424
};
2525

2626
metrics! {
@@ -29,6 +29,8 @@ metrics! {
2929
database_idle_conns: IntGaugeVec["pool"],
3030
/// Number of used database connections in the pool
3131
database_used_conns: IntGaugeVec["pool"],
32+
/// Amount of time required to obtain a database connection
33+
pub database_time_to_obtain_connection: HistogramVec["pool"],
3234

3335
/// Number of requests processed by this instance
3436
pub requests_total: IntCounter,
@@ -44,6 +46,8 @@ metrics! {
4446
pub downloads_unconditional_redirects_total: IntCounter,
4547
/// Number of download requests with a non-canonical crate name.
4648
pub downloads_non_canonical_crate_name_total: IntCounter,
49+
/// How long it takes to execute the SELECT query in the download endpoint.
50+
pub downloads_select_query_execution_time: Histogram,
4751
/// Number of download requests that are not counted yet.
4852
downloads_not_counted_total: IntGauge,
4953
}

src/middleware/log_request.rs

Lines changed: 1 addition & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,8 @@ use conduit::{header, Host, RequestExt, Scheme, StatusCode};
88
use conduit_cookie::RequestSession;
99
use sentry::Level;
1010

11-
use std::cell::RefCell;
12-
use std::collections::HashMap;
1311
use std::fmt::{self, Display, Formatter};
14-
use std::rc::Rc;
15-
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
12+
use std::time::{SystemTime, UNIX_EPOCH};
1613

1714
const SLOW_REQUEST_THRESHOLD_MS: u64 = 1000;
1815

@@ -94,34 +91,6 @@ pub fn add_custom_metadata<V: Display>(req: &mut dyn RequestExt, key: &'static s
9491
}
9592
}
9693

97-
#[derive(Debug, Clone)]
98-
pub struct TimingRecorder {
99-
sections: Rc<RefCell<HashMap<&'static str, Duration>>>,
100-
}
101-
102-
impl TimingRecorder {
103-
pub fn new() -> Self {
104-
Self {
105-
sections: Rc::new(RefCell::new(HashMap::new())),
106-
}
107-
}
108-
109-
pub fn record<R>(&self, name: &'static str, f: impl FnOnce() -> R) -> R {
110-
let start = Instant::now();
111-
let res = f();
112-
self.sections
113-
.borrow_mut()
114-
.insert(name, Instant::now() - start);
115-
res
116-
}
117-
}
118-
119-
impl Default for TimingRecorder {
120-
fn default() -> Self {
121-
Self::new()
122-
}
123-
}
124-
12594
fn report_to_sentry(req: &dyn RequestExt, res: &AfterResult, response_time: u64) {
12695
let (message, level) = match res {
12796
Err(e) => (e.to_string(), Level::Error),
@@ -255,17 +224,6 @@ impl Display for RequestLine<'_> {
255224

256225
if self.response_time > SLOW_REQUEST_THRESHOLD_MS {
257226
line.add_marker("SLOW REQUEST")?;
258-
259-
if let Some(timings) = self.req.extensions().find::<TimingRecorder>() {
260-
for (section, duration) in timings.sections.borrow().iter() {
261-
line.add_quoted_field(
262-
format!("timing_{}", section),
263-
// Debug formatting rounds the duration to the most useful unit and adds
264-
// the unit suffix. For example: 1.20s, 10.00ms, 8.35ns
265-
format!("{:.2?}", duration),
266-
)?;
267-
}
268-
}
269227
}
270228

271229
Ok(())

0 commit comments

Comments
 (0)