From 63ccf69796dada13f04aada64a49787e5c2b87b6 Mon Sep 17 00:00:00 2001 From: Gil Mizrahi Date: Tue, 31 Oct 2023 15:36:55 +0200 Subject: [PATCH] add error metrics --- crates/connectors/ndc-postgres/src/explain.rs | 23 +++++- crates/connectors/ndc-postgres/src/query.rs | 23 +++++- .../query-engine/execution/src/execution.rs | 26 +++++- crates/query-engine/execution/src/metrics.rs | 79 +++++++++++++++++++ .../src/translation/query/error.rs | 14 ++++ 5 files changed, 157 insertions(+), 8 deletions(-) diff --git a/crates/connectors/ndc-postgres/src/explain.rs b/crates/connectors/ndc-postgres/src/explain.rs index 94eacd060..168677a98 100644 --- a/crates/connectors/ndc-postgres/src/explain.rs +++ b/crates/connectors/ndc-postgres/src/explain.rs @@ -44,10 +44,21 @@ pub async fn explain<'a>( .map_err(|err| match err { execution::Error::Query(err) => { tracing::error!("{}", err); - connector::ExplainError::Other(err.into()) + // log error metric + match &err { + execution::QueryError::VariableNotFound(_) => { + state.metrics.error_metrics.record_invalid_request() + } + execution::QueryError::NotSupported(_) => { + state.metrics.error_metrics.record_unsupported_feature() + } + } + + connector::ExplainError::Other(err.to_string().into()) } execution::Error::DB(err) => { tracing::error!("{}", err); + state.metrics.error_metrics.record_database_error(); connector::ExplainError::Other(err.to_string().into()) } })?; @@ -75,10 +86,18 @@ fn plan_query( translation::query::translate(configuration.metadata, query_request).map_err(|err| { tracing::error!("{}", err); match err { + translation::query::error::Error::CapabilityNotSupported(_) => { + state.metrics.error_metrics.record_unsupported_capability(); + connector::ExplainError::UnsupportedOperation(err.to_string()) + } translation::query::error::Error::NotSupported(_) => { + state.metrics.error_metrics.record_unsupported_feature(); connector::ExplainError::UnsupportedOperation(err.to_string()) } - _ => connector::ExplainError::InvalidRequest(err.to_string()), + _ => { + state.metrics.error_metrics.record_invalid_request(); + connector::ExplainError::InvalidRequest(err.to_string()) + } } }); timer.complete_with(result) diff --git a/crates/connectors/ndc-postgres/src/query.rs b/crates/connectors/ndc-postgres/src/query.rs index 9d296d55d..a3f52cade 100644 --- a/crates/connectors/ndc-postgres/src/query.rs +++ b/crates/connectors/ndc-postgres/src/query.rs @@ -59,11 +59,20 @@ fn plan_query( let result = translation::query::translate(configuration.metadata, query_request).map_err(|err| { tracing::error!("{}", err); + // log metrics match err { + translation::query::error::Error::CapabilityNotSupported(_) => { + state.metrics.error_metrics.record_unsupported_capability(); + connector::QueryError::UnsupportedOperation(err.to_string()) + } translation::query::error::Error::NotSupported(_) => { + state.metrics.error_metrics.record_unsupported_feature(); connector::QueryError::UnsupportedOperation(err.to_string()) } - _ => connector::QueryError::InvalidRequest(err.to_string()), + _ => { + state.metrics.error_metrics.record_invalid_request(); + connector::QueryError::InvalidRequest(err.to_string()) + } } }); timer.complete_with(result) @@ -79,10 +88,20 @@ async fn execute_query( .map_err(|err| match err { execution::Error::Query(err) => { tracing::error!("{}", err); - connector::QueryError::Other(err.into()) + // log error metric + match &err { + execution::QueryError::VariableNotFound(_) => { + state.metrics.error_metrics.record_invalid_request() + } + execution::QueryError::NotSupported(_) => { + state.metrics.error_metrics.record_unsupported_feature() + } + } + connector::QueryError::Other(err.to_string().into()) } execution::Error::DB(err) => { tracing::error!("{}", err); + state.metrics.error_metrics.record_database_error(); connector::QueryError::Other(err.to_string().into()) } }) diff --git a/crates/query-engine/execution/src/execution.rs b/crates/query-engine/execution/src/execution.rs index 0fe04efc9..96902023c 100644 --- a/crates/query-engine/execution/src/execution.rs +++ b/crates/query-engine/execution/src/execution.rs @@ -212,13 +212,13 @@ async fn build_query_with_params<'a>( serde_json::Value::Bool(b) => Ok(sqlx_query.bind(b)), serde_json::Value::Null => Ok(sqlx_query.bind::>(None)), serde_json::Value::Array(_array) => Err(Error::Query( - "array variable not currently supported".to_string(), + QueryError::NotSupported("array variables".to_string()), )), serde_json::Value::Object(_object) => Err(Error::Query( - "object variable not currently supported".to_string(), + QueryError::NotSupported("object variables".to_string()), )), }, - None => Err(Error::Query(format!("Variable not found '{}'", var))), + None => Err(Error::Query(QueryError::VariableNotFound(var.to_string()))), }, })?; @@ -226,10 +226,28 @@ async fn build_query_with_params<'a>( } pub enum Error { - Query(String), + Query(QueryError), DB(sqlx::Error), } +pub enum QueryError { + VariableNotFound(String), + NotSupported(String), +} + +impl std::fmt::Display for QueryError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + QueryError::VariableNotFound(thing) => { + write!(f, "Variable '{}' not found.", thing) + } + QueryError::NotSupported(thing) => { + write!(f, "{} are not supported.", thing) + } + } + } +} + impl From for Error { fn from(err: sqlx::Error) -> Error { Error::DB(err) diff --git a/crates/query-engine/execution/src/metrics.rs b/crates/query-engine/execution/src/metrics.rs index b86d39279..0102034f9 100644 --- a/crates/query-engine/execution/src/metrics.rs +++ b/crates/query-engine/execution/src/metrics.rs @@ -21,6 +21,7 @@ pub struct Metrics { pool_acquire_timeout: Gauge, pool_max_lifetime: Gauge, pool_idle_timeout: Gauge, + pub error_metrics: ErrorMetrics, } impl Metrics { @@ -110,6 +111,8 @@ impl Metrics { "Get the maximum lifetime of individual connections, in seconds.", )?; + let error_metrics = ErrorMetrics::initialize(metrics_registry)?; + Ok(Self { query_total, explain_total, @@ -125,6 +128,7 @@ impl Metrics { pool_acquire_timeout, pool_idle_timeout, pool_max_lifetime, + error_metrics, }) } @@ -284,3 +288,78 @@ impl std::fmt::Display for Error { } impl std::error::Error for Error {} + +/// A collection of metrics indicating errors. +#[derive(Debug, Clone)] +pub struct ErrorMetrics { + /// the connector received an invalid request. + invalid_request_total: IntCounter, + /// the connector received a request using capabilities it does not support. + unsupported_capability_total: IntCounter, + /// the connector could not fulfill a request because it does not support + /// certain features (which are not described as capabilities). + unsupported_feature_total: IntCounter, + /// the connector had an internal error. + connector_error_total: IntCounter, + /// the database emmited an error. + database_error_total: IntCounter, +} + +impl ErrorMetrics { + /// Set up counters and gauges used to produce Prometheus metrics + pub fn initialize(metrics_registry: &mut prometheus::Registry) -> Result { + let invalid_request_total = add_int_counter_metric( + metrics_registry, + "ndc_postgres_error_invalid_request_total_count", + "Total number of invalid requests encountered.", + )?; + + let unsupported_capability_total = add_int_counter_metric( + metrics_registry, + "ndc_postgres_error_unsupported_capability_total_count", + "Total number of invalid requests with unsupported capabilities encountered.", + )?; + + let unsupported_feature_total = add_int_counter_metric( + metrics_registry, + "ndc_postgres_error_unsupported_capabilities_total_count", + "Total number of invalid requests with unsupported capabilities encountered.", + )?; + + let connector_error_total = add_int_counter_metric( + metrics_registry, + "ndc_postgres_error_connector_error_total_count", + "Total number of requests failed due to an internal conenctor error.", + )?; + + let database_error_total = add_int_counter_metric( + metrics_registry, + "ndc_postgres_error_database_error_total_count", + "Total number of requests failed due to a database error.", + )?; + + Ok(ErrorMetrics { + invalid_request_total, + unsupported_capability_total, + unsupported_feature_total, + connector_error_total, + database_error_total, + }) + } + + pub fn record_invalid_request(&self) { + self.invalid_request_total.inc(); + } + pub fn record_unsupported_capability(&self) { + self.unsupported_capability_total.inc(); + } + pub fn record_unsupported_feature(&self) { + self.unsupported_feature_total.inc(); + } + pub fn record_connector_error(&self) { + self.connector_error_total.inc(); + } + pub fn record_database_error(&self) { + self.database_error_total.inc(); + } +} diff --git a/crates/query-engine/translation/src/translation/query/error.rs b/crates/query-engine/translation/src/translation/query/error.rs index f3bf76deb..3f24d5394 100644 --- a/crates/query-engine/translation/src/translation/query/error.rs +++ b/crates/query-engine/translation/src/translation/query/error.rs @@ -17,9 +17,20 @@ pub enum Error { EmptyPathForStarCountAggregate, NoFields, TypeMismatch(serde_json::Value, database::ScalarType), + CapabilityNotSupported(UnsupportedCapabilities), NotSupported(String), } +/// Capabilities we don't currently support. +#[derive(Debug)] +pub enum UnsupportedCapabilities {} + +impl std::fmt::Display for UnsupportedCapabilities { + fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result { + todo!() + } +} + /// Display errors. impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { @@ -60,6 +71,9 @@ impl std::fmt::Display for Error { Error::TypeMismatch(value, typ) => { write!(f, "Value '{:?}' is not of type '{:?}'.", value, typ) } + Error::CapabilityNotSupported(thing) => { + write!(f, "Queries containing {} are not supported.", thing) + } Error::NotSupported(thing) => { write!(f, "Queries containing {} are not supported.", thing) }