feat(net): add reason label to backed_off_peers metric (#22009)

Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Emma Jamieson-Hoare
2026-02-11 18:00:20 -05:00
committed by GitHub
parent 3a5c41e3da
commit a161ca294f
4 changed files with 65 additions and 8 deletions

View File

@@ -0,0 +1,5 @@
---
reth-network: minor
---
Added reason label to backed_off_peers metric. The metric now tracks backed off peers by reason (too_many_peers, graceful_close, connection_error) to improve observability.

View File

@@ -25,11 +25,11 @@ use crate::{
listener::ConnectionListener,
message::{NewBlockMessage, PeerMessage},
metrics::{
ClosedSessionsMetrics, DisconnectMetrics, NetworkMetrics, PendingSessionFailureMetrics,
NETWORK_POOL_TRANSACTIONS_SCOPE,
BackedOffPeersMetrics, ClosedSessionsMetrics, DisconnectMetrics, NetworkMetrics,
PendingSessionFailureMetrics, NETWORK_POOL_TRANSACTIONS_SCOPE,
},
network::{NetworkHandle, NetworkHandleMessage},
peers::PeersManager,
peers::{BackoffReason, PeersManager},
poll_nested_stream_with_budget,
protocol::IntoRlpxSubProtocol,
required_block_filter::RequiredBlockFilter,
@@ -146,6 +146,8 @@ pub struct NetworkManager<N: NetworkPrimitives = EthNetworkPrimitives> {
closed_sessions_metrics: ClosedSessionsMetrics,
/// Pending session failure metrics, split by direction.
pending_session_failure_metrics: PendingSessionFailureMetrics,
/// Backed off peers metrics, split by reason.
backed_off_peers_metrics: BackedOffPeersMetrics,
}
impl NetworkManager {
@@ -363,6 +365,7 @@ impl<N: NetworkPrimitives> NetworkManager<N> {
disconnect_metrics: Default::default(),
closed_sessions_metrics: Default::default(),
pending_session_failure_metrics: Default::default(),
backed_off_peers_metrics: Default::default(),
})
}
@@ -869,10 +872,15 @@ impl<N: NetworkPrimitives> NetworkManager<N> {
&peer_id,
err,
);
self.backed_off_peers_metrics.increment_for_reason(
BackoffReason::from_disconnect(err.as_disconnected()),
);
err.as_disconnected()
} else {
// Gracefully disconnected
self.swarm.state_mut().peers_mut().on_active_session_gracefully_closed(peer_id);
self.backed_off_peers_metrics
.increment_for_reason(BackoffReason::GracefulClose);
None
};
self.closed_sessions_metrics.active.increment(1);
@@ -914,9 +922,6 @@ impl<N: NetworkPrimitives> NetworkManager<N> {
self.metrics
.incoming_connections
.set(self.swarm.state().peers().num_inbound_connections() as f64);
self.metrics
.backed_off_peers
.set(self.swarm.state().peers().num_backed_off_peers() as f64);
}
SwarmEvent::OutgoingPendingSessionClosed { remote_addr, peer_id, error } => {
trace!(
@@ -934,6 +939,9 @@ impl<N: NetworkPrimitives> NetworkManager<N> {
err,
);
self.pending_session_failure_metrics.outbound.increment(1);
self.backed_off_peers_metrics.increment_for_reason(
BackoffReason::from_disconnect(err.as_disconnected()),
);
if let Some(reason) = err.as_disconnected() {
self.disconnect_metrics.increment(reason);
}
@@ -945,7 +953,6 @@ impl<N: NetworkPrimitives> NetworkManager<N> {
}
self.closed_sessions_metrics.outgoing_pending.increment(1);
self.update_pending_connection_metrics();
self.metrics
.backed_off_peers
.set(self.swarm.state().peers().num_backed_off_peers() as f64);
@@ -965,6 +972,7 @@ impl<N: NetworkPrimitives> NetworkManager<N> {
&error,
);
self.backed_off_peers_metrics.increment_for_reason(BackoffReason::ConnectionError);
self.metrics
.backed_off_peers
.set(self.swarm.state().peers().num_backed_off_peers() as f64);

View File

@@ -2,7 +2,7 @@ use metrics::Histogram;
use reth_eth_wire::DisconnectReason;
use reth_ethereum_primitives::TxType;
use reth_metrics::{
metrics::{self, Counter, Gauge},
metrics::{Counter, Gauge},
Metrics,
};
@@ -110,6 +110,29 @@ impl Default for PendingSessionFailureMetrics {
}
}
/// Metrics for backed off peers, split by reason.
#[derive(Metrics)]
#[metrics(scope = "network.backed_off_peers")]
pub struct BackedOffPeersMetrics {
/// Peers backed off because they reported too many peers.
pub too_many_peers: Counter,
/// Peers backed off after a graceful session close.
pub graceful_close: Counter,
/// Peers backed off due to connection or protocol errors.
pub connection_error: Counter,
}
impl BackedOffPeersMetrics {
/// Increments the counter for the given backoff reason.
pub fn increment_for_reason(&self, reason: crate::peers::BackoffReason) {
match reason {
crate::peers::BackoffReason::TooManyPeers => self.too_many_peers.increment(1),
crate::peers::BackoffReason::GracefulClose => self.graceful_close.increment(1),
crate::peers::BackoffReason::ConnectionError => self.connection_error.increment(1),
}
}
}
/// Metrics for `SessionManager`
#[derive(Metrics)]
#[metrics(scope = "network")]

View File

@@ -1260,6 +1260,27 @@ impl Display for InboundConnectionError {
}
}
/// The reason a peer was backed off.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BackoffReason {
/// The remote peer responded with `TooManyPeers` (0x04).
TooManyPeers,
/// The session was gracefully closed and we're backing off briefly.
GracefulClose,
/// A connection or protocol-level error occurred.
ConnectionError,
}
impl BackoffReason {
/// Derives the backoff reason from an optional [`DisconnectReason`].
pub const fn from_disconnect(reason: Option<DisconnectReason>) -> Self {
match reason {
Some(DisconnectReason::TooManyPeers) => Self::TooManyPeers,
_ => Self::ConnectionError,
}
}
}
#[cfg(test)]
mod tests {
use alloy_primitives::B512;