From d989a59fac9981387eecdd51ef939e842f9a190f Mon Sep 17 00:00:00 2001 From: Dawid Pawlik <501149991dp@gmail.com> Date: Wed, 11 Dec 2024 11:50:44 +0100 Subject: [PATCH] metrics: add latency histogram statistics I've added histogram metrics used in cpp-rust-driver. The snapshot of histogram statistics is taken under concurrency precautions using lock-free histogram features. I've adjusted the docs book adding an example of taking the snapshot and accessing it's values. --- docs/source/metrics/metrics.md | 15 +++ examples/basic.rs | 12 ++ .../histogram/lock_free_histogram.rs | 106 ++++++++++++++++++ scylla/src/transport/histogram/mod.rs | 1 + scylla/src/transport/metrics.rs | 10 +- 5 files changed, 143 insertions(+), 1 deletion(-) diff --git a/docs/source/metrics/metrics.md b/docs/source/metrics/metrics.md index ce52615383..0d50d7be73 100644 --- a/docs/source/metrics/metrics.md +++ b/docs/source/metrics/metrics.md @@ -1,5 +1,7 @@ # Driver metrics +This feature is available only under the crate feature `metrics`. + During operation the driver collects various metrics. They can be accessed at any moment using `Session::get_metrics()` @@ -11,6 +13,7 @@ They can be accessed at any moment using `Session::get_metrics()` * Total number of paged queries * Number of errors during paged queries * Number of retries +* Latency histogram statistics (min, max, mean, standard deviation, percentiles) ### Example ```rust @@ -29,6 +32,18 @@ println!( "99.9 latency percentile: {}", metrics.get_latency_percentile_ms(99.9).unwrap() ); + +let snapshot = metrics.get_snapshot().unwrap(); +println!("Min: {}", snapshot.min); +println!("Max: {}", snapshot.max); +println!("Mean: {}", snapshot.mean); +println!("Standard deviation: {}", snapshot.stddev); +println!("Median: {}", snapshot.median); +println!("75th percentile: {}", snapshot.percentile_75); +println!("90th percentile: {}", snapshot.percentile_90); +println!("95th percentile: {}", snapshot.percentile_95); +println!("99th percentile: {}", snapshot.percentile_99); +println!("99.9th percentile: {}", snapshot.percentile_99_9); # Ok(()) # } ``` \ No newline at end of file diff --git a/examples/basic.rs b/examples/basic.rs index c4fe10b8b3..e658056220 100644 --- a/examples/basic.rs +++ b/examples/basic.rs @@ -100,6 +100,18 @@ async fn main() -> Result<()> { metrics.get_latency_percentile_ms(99.9).unwrap() ); + let snapshot = metrics.get_snapshot().unwrap(); + println!("Min: {}", snapshot.min); + println!("Max: {}", snapshot.max); + println!("Mean: {}", snapshot.mean); + println!("Standard deviation: {}", snapshot.stddev); + println!("Median: {}", snapshot.median); + println!("75th percentile: {}", snapshot.percentile_75); + println!("90th percentile: {}", snapshot.percentile_90); + println!("95th percentile: {}", snapshot.percentile_95); + println!("99th percentile: {}", snapshot.percentile_99); + println!("99.9th percentile: {}", snapshot.percentile_99_9); + println!("Ok."); Ok(()) diff --git a/scylla/src/transport/histogram/lock_free_histogram.rs b/scylla/src/transport/histogram/lock_free_histogram.rs index 70c6ec8be0..75aa61370c 100644 --- a/scylla/src/transport/histogram/lock_free_histogram.rs +++ b/scylla/src/transport/histogram/lock_free_histogram.rs @@ -31,6 +31,23 @@ pub struct Histogram { config: Config, } +/// Snapshot is a structure that contains histogram statistics such as +/// min, max, mean, standard deviation, median, and most common percentiles +/// collected in a certain moment. +#[derive(Debug)] +pub struct Snapshot { + pub min: u64, + pub max: u64, + pub mean: u64, + pub stddev: u64, + pub median: u64, + pub percentile_75: u64, + pub percentile_90: u64, + pub percentile_95: u64, + pub percentile_99: u64, + pub percentile_99_9: u64, +} + impl Histogram { pub fn new() -> Self { let grouping_power = 7; @@ -109,6 +126,95 @@ impl Histogram { } } + pub fn snapshot() -> impl FnOnce(&[AtomicU64], &Config) -> Result { + |buckets, config| { + let total_count = Histogram::get_total_count(buckets); + + let mut min = u64::MAX; + let mut max = 0; + let mut weighted_sum = 0; + let mut pref_sum = 0; + let mut percentile_75 = 0; + let mut percentile_90 = 0; + let mut percentile_95 = 0; + let mut percentile_99 = 0; + let mut percentile_99_9 = 0; + + let percentile_75_threshold = (0.75 * total_count as f64).ceil() as u128; + let percentile_90_threshold = (0.9 * total_count as f64).ceil() as u128; + let percentile_95_threshold = (0.95 * total_count as f64).ceil() as u128; + let percentile_99_threshold = (0.99 * total_count as f64).ceil() as u128; + let percentile_99_9_threshold = (0.999 * total_count as f64).ceil() as u128; + + for (i, bucket) in buckets.iter().enumerate() { + let count = bucket.load(ORDER_TYPE) as u128; + if count == 0 { + continue; + } + + let lower_bound = config.index_to_lower_bound(i); + let upper_bound = config.index_to_upper_bound(i); + + if lower_bound < min { + min = lower_bound; + } + if upper_bound > max { + max = upper_bound; + } + + weighted_sum += count * lower_bound as u128; + + let next_pref_sum = pref_sum + count; + if pref_sum < percentile_75_threshold && next_pref_sum >= percentile_75_threshold { + percentile_75 = lower_bound; + } + if pref_sum < percentile_90_threshold && next_pref_sum >= percentile_90_threshold { + percentile_90 = lower_bound; + } + if pref_sum < percentile_95_threshold && next_pref_sum >= percentile_95_threshold { + percentile_95 = lower_bound; + } + if pref_sum < percentile_99_threshold && next_pref_sum >= percentile_99_threshold { + percentile_99 = lower_bound; + } + if pref_sum < percentile_99_9_threshold + && next_pref_sum >= percentile_99_9_threshold + { + percentile_99_9 = lower_bound; + } + + pref_sum = next_pref_sum; + } + + let mean = (weighted_sum / total_count) as u64; + let mut variance_sum = 0; + for (i, bucket) in buckets.iter().enumerate() { + let count = bucket.load(ORDER_TYPE) as u128; + if count == 0 { + continue; + } + + let lower_bound = config.index_to_lower_bound(i); + variance_sum += count * (lower_bound as u128 - mean as u128).pow(2); + } + let variance = variance_sum / total_count; + let stddev = (variance as f64).sqrt() as u64; + + Ok(Snapshot { + min, + max, + mean, + stddev, + median: config.index_to_lower_bound(buckets.len() / 2), + percentile_75, + percentile_90, + percentile_95, + percentile_99, + percentile_99_9, + }) + } + } + pub fn get_total_count(buckets: &[AtomicU64]) -> u128 { buckets.iter().map(|v| v.load(ORDER_TYPE) as u128).sum() } diff --git a/scylla/src/transport/histogram/mod.rs b/scylla/src/transport/histogram/mod.rs index b02ade466d..2a3025f284 100644 --- a/scylla/src/transport/histogram/mod.rs +++ b/scylla/src/transport/histogram/mod.rs @@ -3,3 +3,4 @@ mod lock_free_histogram; pub use config::Config; pub use lock_free_histogram::Histogram; +pub use lock_free_histogram::Snapshot; diff --git a/scylla/src/transport/metrics.rs b/scylla/src/transport/metrics.rs index 832496b28f..20919004be 100644 --- a/scylla/src/transport/metrics.rs +++ b/scylla/src/transport/metrics.rs @@ -1,4 +1,4 @@ -use crate::transport::histogram::Histogram; +use crate::transport::histogram::{Histogram, Snapshot}; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; @@ -97,6 +97,14 @@ impl Metrics { Ok(result) } + /// Returns snapshot of histogram metrics taken at the moment of calling this function. \ + /// Available metrics: min, max, mean, std_dev, median, + /// percentile_90, percentile_95, percentile_99, percentile_99_9. + pub fn get_snapshot(&self) -> Result { + let snapshot = self.histogram.log_operation(Histogram::snapshot())?; + Ok(snapshot) + } + /// Returns counter for errors occurred in nonpaged queries pub fn get_errors_num(&self) -> u64 { self.errors_num.load(ORDER_TYPE)