Skip to content

Commit

Permalink
feat: don't crash relayer due to flaky RPCs (#5115)
Browse files Browse the repository at this point in the history
### Description
- instead of crashing relayer if any RPC calls fail during
`build_mailboxes` and `build_validator_announces`, we just log the error
and metric
 - relayer will continue running without those chains

### Drive-by changes

<!--
Are there any minor or drive-by changes also included?
-->

### Related issues
Fixes #4887

### Backward compatibility

Are these changes backward compatible? Are there any infrastructure
implications, e.g. changes that would prohibit deploying older commits
using this infra tooling?

Yes

### Testing

Added unittests to test ensure functions run correctly and log errors in
metrics

---------

Co-authored-by: Danil Nemirovsky <[email protected]>
Co-authored-by: Trevor Porter <[email protected]>
  • Loading branch information
3 people authored Jan 15, 2025
1 parent af0dd49 commit c47511b
Show file tree
Hide file tree
Showing 6 changed files with 295 additions and 22 deletions.
2 changes: 2 additions & 0 deletions rust/main/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions rust/main/agents/relayer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ hyperlane-ethereum = { path = "../../chains/hyperlane-ethereum" }
once_cell.workspace = true
mockall.workspace = true
tokio-test.workspace = true
tracing-test.workspace = true
hyperlane-test = { path = "../../hyperlane-test" }
hyperlane-base = { path = "../../hyperlane-base", features = ["test-utils"] }
hyperlane-core = { path = "../../hyperlane-core", features = ["agent", "async", "test-utils"] }
ethers-prometheus = { path = "../../ethers-prometheus", features = ["serde"] }

[features]
default = ["color-eyre", "oneline-errors"]
Expand Down
297 changes: 283 additions & 14 deletions rust/main/agents/relayer/src/relayer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ use hyperlane_base::{
};
use hyperlane_core::{
rpc_clients::call_and_retry_n_times, ChainCommunicationError, ContractSyncCursor,
HyperlaneDomain, HyperlaneMessage, InterchainGasPayment, MerkleTreeInsertion, QueueOperation,
H512, U256,
HyperlaneDomain, HyperlaneMessage, InterchainGasPayment, Mailbox, MerkleTreeInsertion,
QueueOperation, ValidatorAnnounce, H512, U256,
};
use tokio::{
sync::{
Expand Down Expand Up @@ -135,12 +135,10 @@ impl BaseAgent for Relayer {
.map(|origin| (origin.clone(), HyperlaneRocksDB::new(origin, db.clone())))
.collect::<HashMap<_, _>>();

let mailboxes = settings
.build_mailboxes(settings.destination_chains.iter(), &core_metrics)
.await?;
let validator_announces = settings
.build_validator_announces(settings.origin_chains.iter(), &core_metrics)
.await?;
let mailboxes = Self::build_mailboxes(&settings, &core_metrics, &chain_metrics).await;

let validator_announces =
Self::build_validator_announces(&settings, &core_metrics, &chain_metrics).await;

let contract_sync_metrics = Arc::new(ContractSyncMetrics::new(&core_metrics));

Expand Down Expand Up @@ -236,7 +234,9 @@ impl BaseAgent for Relayer {

let mut msg_ctxs = HashMap::new();
let mut destination_chains = HashMap::new();
for destination in &settings.destination_chains {

// only iterate through destination chains that were successfully instantiated
for (destination, dest_mailbox) in mailboxes.iter() {
let destination_chain_setup = core.settings.chain_setup(destination).unwrap().clone();
destination_chains.insert(destination.clone(), destination_chain_setup.clone());
let transaction_gas_limit: Option<U256> =
Expand All @@ -246,18 +246,19 @@ impl BaseAgent for Relayer {
transaction_gas_limit
};

for origin in &settings.origin_chains {
// only iterate through origin chains that were successfully instantiated
for (origin, validator_announce) in validator_announces.iter() {
let db = dbs.get(origin).unwrap().clone();
let metadata_builder = BaseMetadataBuilder::new(
origin.clone(),
destination_chain_setup.clone(),
prover_syncs[origin].clone(),
validator_announces[origin].clone(),
validator_announce.clone(),
settings.allow_local_checkpoint_syncers,
core.metrics.clone(),
db,
IsmAwareAppContextClassifier::new(
mailboxes[destination].clone(),
dest_mailbox.clone(),
settings.metric_app_contexts.clone(),
),
);
Expand All @@ -268,7 +269,7 @@ impl BaseAgent for Relayer {
destination: destination.id(),
},
Arc::new(MessageContext {
destination_mailbox: mailboxes[destination].clone(),
destination_mailbox: dest_mailbox.clone(),
origin_db: dbs.get(origin).unwrap().clone(),
metadata_builder: Arc::new(metadata_builder),
origin_gas_payment_enforcer: gas_payment_enforcers[origin].clone(),
Expand Down Expand Up @@ -620,7 +621,275 @@ impl Relayer {
}))
.instrument(span)
}

/// Helper function to build and return a hashmap of mailboxes.
/// Any chains that fail to build mailbox will not be included
/// in the hashmap. Errors will be logged and chain metrics
/// will be updated for chains that fail to build mailbox.
pub async fn build_mailboxes(
settings: &RelayerSettings,
core_metrics: &CoreMetrics,
chain_metrics: &ChainMetrics,
) -> HashMap<HyperlaneDomain, Arc<dyn Mailbox>> {
settings
.build_mailboxes(settings.destination_chains.iter(), core_metrics)
.await
.into_iter()
.filter_map(|(origin, mailbox_res)| match mailbox_res {
Ok(mailbox) => Some((origin, mailbox)),
Err(err) => {
error!(?err, origin=?origin, "Critical error when building mailbox");
chain_metrics.set_critical_error(origin.name(), true);
None
}
})
.collect()
}

/// Helper function to build and return a hashmap of validator announces.
/// Any chains that fail to build validator announce will not be included
/// in the hashmap. Errors will be logged and chain metrics
/// will be updated for chains that fail to build validator announce.
pub async fn build_validator_announces(
settings: &RelayerSettings,
core_metrics: &CoreMetrics,
chain_metrics: &ChainMetrics,
) -> HashMap<HyperlaneDomain, Arc<dyn ValidatorAnnounce>> {
settings
.build_validator_announces(settings.origin_chains.iter(), core_metrics)
.await
.into_iter()
.filter_map(|(origin, mailbox_res)| match mailbox_res {
Ok(mailbox) => Some((origin, mailbox)),
Err(err) => {
error!(?err, origin=?origin, "Critical error when building validator announce");
chain_metrics.set_critical_error(origin.name(), true);
None
}
})
.collect()
}
}

#[cfg(test)]
mod test {}
mod test {
use std::{
collections::{HashMap, HashSet},
path::PathBuf,
};

use crate::settings::{matching_list::MatchingList, RelayerSettings};
use ethers::utils::hex;
use ethers_prometheus::middleware::PrometheusMiddlewareConf;
use hyperlane_base::{
settings::{
ChainConf, ChainConnectionConf, CoreContractAddresses, IndexSettings, Settings,
TracingConfig,
},
ChainMetrics, CoreMetrics, BLOCK_HEIGHT_HELP, BLOCK_HEIGHT_LABELS, CRITICAL_ERROR_HELP,
CRITICAL_ERROR_LABELS,
};
use hyperlane_core::{
config::OperationBatchConfig, HyperlaneDomain, IndexMode, KnownHyperlaneDomain,
ReorgPeriod, H256,
};
use hyperlane_ethereum as h_eth;
use prometheus::{opts, IntGaugeVec, Registry};
use reqwest::Url;

use super::Relayer;

/// Builds a test RelayerSetting
fn generate_test_relayer_settings() -> RelayerSettings {
let chains = [(
"arbitrum".to_string(),
ChainConf {
domain: HyperlaneDomain::Known(KnownHyperlaneDomain::Arbitrum),
signer: None,
reorg_period: ReorgPeriod::None,
addresses: CoreContractAddresses {
mailbox: H256::from_slice(
hex::decode(
"000000000000000000000000598facE78a4302f11E3de0bee1894Da0b2Cb71F8",
)
.unwrap()
.as_slice(),
),
interchain_gas_paymaster: H256::from_slice(
hex::decode(
"000000000000000000000000c756cFc1b7d0d4646589EDf10eD54b201237F5e8",
)
.unwrap()
.as_slice(),
),
validator_announce: H256::from_slice(
hex::decode(
"0000000000000000000000001b33611fCc073aB0737011d5512EF673Bff74962",
)
.unwrap()
.as_slice(),
),
merkle_tree_hook: H256::from_slice(
hex::decode(
"000000000000000000000000AD34A66Bf6dB18E858F6B686557075568c6E031C",
)
.unwrap()
.as_slice(),
),
},
connection: ChainConnectionConf::Ethereum(h_eth::ConnectionConf {
rpc_connection: h_eth::RpcConnectionConf::Http {
url: Url::parse("https://sepolia-rollup.arbitrum.io/rpc").unwrap(),
},
transaction_overrides: h_eth::TransactionOverrides {
gas_price: None,
gas_limit: None,
max_fee_per_gas: None,
max_priority_fee_per_gas: None,
},
operation_batch: OperationBatchConfig {
batch_contract_address: None,
max_batch_size: 1,
},
}),
metrics_conf: PrometheusMiddlewareConf {
contracts: HashMap::new(),
chain: None,
},
index: IndexSettings {
from: 0,
chunk_size: 1,
mode: IndexMode::Block,
},
},
)];

RelayerSettings {
base: Settings {
chains: chains.into_iter().collect(),
metrics_port: 5000,
tracing: TracingConfig::default(),
},
db: PathBuf::new(),
origin_chains: [
HyperlaneDomain::Known(KnownHyperlaneDomain::Arbitrum),
HyperlaneDomain::Known(KnownHyperlaneDomain::Ethereum),
HyperlaneDomain::Known(KnownHyperlaneDomain::Optimism),
]
.into_iter()
.collect(),
destination_chains: [
HyperlaneDomain::Known(KnownHyperlaneDomain::Arbitrum),
HyperlaneDomain::Known(KnownHyperlaneDomain::Ethereum),
HyperlaneDomain::Known(KnownHyperlaneDomain::Optimism),
]
.into_iter()
.collect(),
gas_payment_enforcement: Vec::new(),
whitelist: MatchingList::default(),
blacklist: MatchingList::default(),
address_blacklist: Vec::new(),
transaction_gas_limit: None,
skip_transaction_gas_limit_for: HashSet::new(),
allow_local_checkpoint_syncers: true,
metric_app_contexts: Vec::new(),
}
}

#[tokio::test]
#[tracing_test::traced_test]
async fn test_failed_build_mailboxes() {
let settings = generate_test_relayer_settings();

let registry = Registry::new();
let core_metrics = CoreMetrics::new("relayer", 4000, registry).unwrap();
let chain_metrics = ChainMetrics {
block_height: IntGaugeVec::new(
opts!("block_height", BLOCK_HEIGHT_HELP),
BLOCK_HEIGHT_LABELS,
)
.unwrap(),
gas_price: None,
critical_error: IntGaugeVec::new(
opts!("critical_error", CRITICAL_ERROR_HELP),
CRITICAL_ERROR_LABELS,
)
.unwrap(),
};

let mailboxes = Relayer::build_mailboxes(&settings, &core_metrics, &chain_metrics).await;

assert_eq!(mailboxes.len(), 1);
assert!(mailboxes.contains_key(&HyperlaneDomain::Known(KnownHyperlaneDomain::Arbitrum)));

// Arbitrum chain should not have any errors because it's ChainConf exists
let metric = chain_metrics
.critical_error
.get_metric_with_label_values(&["arbitrum"])
.unwrap();
assert_eq!(metric.get(), 0);

// Ethereum chain should error because it is missing ChainConf
let metric = chain_metrics
.critical_error
.get_metric_with_label_values(&["ethereum"])
.unwrap();
assert_eq!(metric.get(), 1);

// Optimism chain should error because it is missing ChainConf
let metric = chain_metrics
.critical_error
.get_metric_with_label_values(&["optimism"])
.unwrap();
assert_eq!(metric.get(), 1);
}

#[tokio::test]
#[tracing_test::traced_test]
async fn test_failed_build_validator_announces() {
let settings = generate_test_relayer_settings();

let registry = Registry::new();
let core_metrics = CoreMetrics::new("relayer", 4000, registry).unwrap();
let chain_metrics = ChainMetrics {
block_height: IntGaugeVec::new(
opts!("block_height", BLOCK_HEIGHT_HELP),
BLOCK_HEIGHT_LABELS,
)
.unwrap(),
gas_price: None,
critical_error: IntGaugeVec::new(
opts!("critical_error", CRITICAL_ERROR_HELP),
CRITICAL_ERROR_LABELS,
)
.unwrap(),
};

let mailboxes =
Relayer::build_validator_announces(&settings, &core_metrics, &chain_metrics).await;

assert_eq!(mailboxes.len(), 1);
assert!(mailboxes.contains_key(&HyperlaneDomain::Known(KnownHyperlaneDomain::Arbitrum)));

// Arbitrum chain should not have any errors because it's ChainConf exists
let metric = chain_metrics
.critical_error
.get_metric_with_label_values(&["arbitrum"])
.unwrap();
assert_eq!(metric.get(), 0);

// Ethereum chain should error because it is missing ChainConf
let metric = chain_metrics
.critical_error
.get_metric_with_label_values(&["ethereum"])
.unwrap();
assert_eq!(metric.get(), 1);

// Optimism chain should error because it is missing ChainConf
let metric = chain_metrics
.critical_error
.get_metric_with_label_values(&["optimism"])
.unwrap();
assert_eq!(metric.get(), 1);
}
}
2 changes: 1 addition & 1 deletion rust/main/agents/relayer/src/settings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub struct RelayerSettings {
#[as_mut]
#[deref]
#[deref_mut]
base: Settings,
pub base: Settings,

/// Database path
pub db: PathBuf,
Expand Down
2 changes: 1 addition & 1 deletion rust/main/hyperlane-base/src/metrics/agent_metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ pub struct ChainMetrics {
pub gas_price: Option<GaugeVec>,

/// Boolean marker for critical errors on a chain, signalling loss of liveness.
critical_error: IntGaugeVec,
pub critical_error: IntGaugeVec,
}

impl ChainMetrics {
Expand Down
Loading

0 comments on commit c47511b

Please sign in to comment.