Skip to content

Commit 1d3266a

Browse files
committed
Auto merge of #3857 - rust-lang:revert-3829-pa-db-pool-histogram, r=pietroalbini
Revert "Track used database conns with an histogram" Reverts #3829 This has been causing issues in production: ![image](https://user-images.githubusercontent.com/141300/130358224-673c5126-622d-4a92-bbcf-5fa95404533a.png)
2 parents 2988a76 + 3343521 commit 1d3266a

File tree

6 files changed

+48
-127
lines changed

6 files changed

+48
-127
lines changed

src/app.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,6 @@ impl App {
128128
instance_metrics
129129
.database_time_to_obtain_connection
130130
.with_label_values(&["primary"]),
131-
instance_metrics
132-
.database_used_conns_histogram
133-
.with_label_values(&["primary"]),
134131
)
135132
.unwrap()
136133
};
@@ -158,9 +155,6 @@ impl App {
158155
instance_metrics
159156
.database_time_to_obtain_connection
160157
.with_label_values(&["follower"]),
161-
instance_metrics
162-
.database_used_conns_histogram
163-
.with_label_values(&["follower"]),
164158
)
165159
.unwrap(),
166160
)

src/db.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use crate::middleware::app::RequestApp;
1313
pub enum DieselPool {
1414
Pool {
1515
pool: r2d2::Pool<ConnectionManager<PgConnection>>,
16-
used_conns_metric: Histogram,
1716
time_to_obtain_connection_metric: Histogram,
1817
},
1918
Test(Arc<ReentrantMutex<PgConnection>>),
@@ -23,7 +22,6 @@ impl DieselPool {
2322
pub(crate) fn new(
2423
url: &str,
2524
config: r2d2::Builder<ConnectionManager<PgConnection>>,
26-
used_conns_metric: Histogram,
2725
time_to_obtain_connection_metric: Histogram,
2826
) -> Result<DieselPool, PoolError> {
2927
let manager = ConnectionManager::new(connection_url(url));
@@ -41,7 +39,6 @@ impl DieselPool {
4139
// automatically be marked as unhealthy and the rest of the application will adapt.
4240
let pool = DieselPool::Pool {
4341
pool: config.build_unchecked(manager),
44-
used_conns_metric,
4542
time_to_obtain_connection_metric,
4643
};
4744
match pool.wait_until_healthy(Duration::from_secs(5)) {
@@ -65,13 +62,8 @@ impl DieselPool {
6562
match self {
6663
DieselPool::Pool {
6764
pool,
68-
used_conns_metric,
6965
time_to_obtain_connection_metric,
7066
} => time_to_obtain_connection_metric.observe_closure_duration(|| {
71-
// Record the number of used connections before obtaining the current one.
72-
let state = pool.state();
73-
used_conns_metric.observe((state.connections - state.idle_connections) as f64);
74-
7567
if let Some(conn) = pool.try_get() {
7668
Ok(DieselPooledConn::Pool(conn))
7769
} else if !self.is_healthy() {

src/metrics/histogram.rs

Lines changed: 0 additions & 103 deletions
This file was deleted.

src/metrics/instance.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
//! As a rule of thumb, if the metric requires a database query to be updated it's probably a
1818
//! service-level metric, and you should add it to `src/metrics/service.rs` instead.
1919
20-
use crate::metrics::histogram::{DatabasePoolBuckets, Histogram, HistogramVec, TimingBuckets};
2120
use crate::util::errors::AppResult;
2221
use crate::{app::App, db::DieselPool};
23-
use prometheus::{proto::MetricFamily, IntCounter, IntCounterVec, IntGauge, IntGaugeVec};
22+
use prometheus::{
23+
proto::MetricFamily, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
24+
};
2425

2526
metrics! {
2627
pub struct InstanceMetrics {
@@ -29,17 +30,15 @@ metrics! {
2930
/// Number of used database connections in the pool
3031
database_used_conns: IntGaugeVec["pool"],
3132
/// Amount of time required to obtain a database connection
32-
pub database_time_to_obtain_connection: HistogramVec<TimingBuckets>["pool"],
33-
/// Number of used database connections in the pool, as histogram
34-
pub database_used_conns_histogram: HistogramVec<DatabasePoolBuckets>["pool"],
33+
pub database_time_to_obtain_connection: HistogramVec["pool"],
3534

3635
/// Number of requests processed by this instance
3736
pub requests_total: IntCounter,
3837
/// Number of requests currently being processed
3938
pub requests_in_flight: IntGauge,
4039

4140
/// Response times of our endpoints
42-
pub response_times: HistogramVec<TimingBuckets>["endpoint"],
41+
pub response_times: HistogramVec["endpoint"],
4342
/// Nmber of responses per status code
4443
pub responses_by_status_code_total: IntCounterVec["status"],
4544

@@ -48,7 +47,7 @@ metrics! {
4847
/// Number of download requests with a non-canonical crate name.
4948
pub downloads_non_canonical_crate_name_total: IntCounter,
5049
/// How long it takes to execute the SELECT query in the download endpoint.
51-
pub downloads_select_query_execution_time: Histogram<TimingBuckets>,
50+
pub downloads_select_query_execution_time: Histogram,
5251
/// Number of download requests that are not counted yet.
5352
downloads_not_counted_total: IntGauge,
5453
}

src/metrics/macros.rs

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,19 @@
1-
use prometheus::Opts;
1+
use prometheus::{Histogram, HistogramOpts, HistogramVec, Opts};
2+
3+
/// Prometheus's histograms work by dividing datapoints in buckets, with each bucket containing
4+
/// the count of datapoints equal or greater to the bucket value.
5+
///
6+
/// The buckets used by crates.io are geared towards measuring the response time of our requests,
7+
/// going from 0.5ms to 100ms with a higher resolution and from 100ms to 5 seconds with a slightly
8+
/// lower resolution. This allows us to properly measure download requests (which take around 1ms)
9+
/// and other requests (our 95h is around 10-20ms).
10+
///
11+
/// Histogram buckets are not an exact science, so feel free to tweak the buckets if you see that
12+
/// the histograms are not really accurate. Just avoid adding too many buckets as that increases
13+
/// the number of exported metric series.
14+
const HISTOGRAM_BUCKETS: &[f64] = &[
15+
0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.5, 1.0, 5.0,
16+
];
217

318
pub(super) trait MetricFromOpts: Sized {
419
fn from_opts(opts: Opts) -> Result<Self, prometheus::Error>;
@@ -90,4 +105,29 @@ load_metric_type!(GaugeVec as vec);
90105
load_metric_type!(IntGauge as single);
91106
load_metric_type!(IntGaugeVec as vec);
92107

93-
// Histograms are defined in histogram.rs
108+
// Use a custom implementation for histograms to customize the buckets.
109+
110+
impl MetricFromOpts for Histogram {
111+
fn from_opts(opts: Opts) -> Result<Self, prometheus::Error> {
112+
Histogram::with_opts(HistogramOpts {
113+
common_opts: opts,
114+
buckets: HISTOGRAM_BUCKETS.to_vec(),
115+
})
116+
}
117+
}
118+
119+
impl MetricFromOpts for HistogramVec {
120+
fn from_opts(opts: Opts) -> Result<Self, prometheus::Error> {
121+
HistogramVec::new(
122+
HistogramOpts {
123+
common_opts: opts.clone(),
124+
buckets: HISTOGRAM_BUCKETS.to_vec(),
125+
},
126+
opts.variable_labels
127+
.iter()
128+
.map(|s| s.as_str())
129+
.collect::<Vec<_>>()
130+
.as_slice(),
131+
)
132+
}
133+
}

src/metrics/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ pub use self::service::ServiceMetrics;
55
#[macro_use]
66
mod macros;
77

8-
mod histogram;
98
mod instance;
109
mod log_encoder;
1110
mod service;

0 commit comments

Comments
 (0)