feat: add peer disconnect metrics (#1956)

This commit is contained in:
Dan Cline
2023-03-24 00:46:55 -04:00
committed by GitHub
parent a116d1cf97
commit 790541a710
3 changed files with 95 additions and 2 deletions

View File

@@ -23,7 +23,7 @@ use crate::{
import::{BlockImport, BlockImportOutcome, BlockValidation},
listener::ConnectionListener,
message::{NewBlockMessage, PeerMessage, PeerRequest, PeerRequestSender},
metrics::NetworkMetrics,
metrics::{DisconnectMetrics, NetworkMetrics},
network::{NetworkHandle, NetworkHandleMessage},
peers::{PeersHandle, PeersManager},
session::SessionManager,
@@ -53,7 +53,7 @@ use std::{
};
use tokio::sync::mpsc;
use tokio_stream::wrappers::UnboundedReceiverStream;
use tracing::{error, info, trace, warn};
use tracing::{debug, error, info, trace, warn};
/// Manages the _entire_ state of the network.
///
/// This is an endless [`Future`] that consistently drives the state of the entire network forward.
@@ -107,6 +107,8 @@ pub struct NetworkManager<C> {
num_active_peers: Arc<AtomicUsize>,
/// Metrics for the Network
metrics: NetworkMetrics,
/// Disconnect metrics for the Network
disconnect_metrics: DisconnectMetrics,
}
// === impl NetworkManager ===
@@ -231,6 +233,7 @@ where
to_eth_request_handler: None,
num_active_peers,
metrics: Default::default(),
disconnect_metrics: Default::default(),
})
}
@@ -647,6 +650,7 @@ where
?total_active,
"Session established"
);
debug!(target: "net", peer_enode=%NodeRecord::new(remote_addr, peer_id), "Established peer enode");
if direction.is_incoming() {
this.swarm
@@ -714,6 +718,9 @@ where
this.metrics
.outgoing_connections
.set(this.swarm.state().peers().num_outbound_connections() as f64);
if let Some(reason) = reason {
this.disconnect_metrics.increment(reason);
}
this.event_listeners
.send(NetworkEvent::SessionClosed { peer_id, reason });
}
@@ -731,6 +738,9 @@ where
.peers_mut()
.on_incoming_pending_session_dropped(remote_addr, err);
this.metrics.pending_session_failures.increment(1);
if let Some(reason) = err.as_disconnected() {
this.disconnect_metrics.increment(reason);
}
} else {
this.swarm
.state_mut()
@@ -762,6 +772,9 @@ where
err,
);
this.metrics.pending_session_failures.increment(1);
if let Some(reason) = err.as_disconnected() {
this.disconnect_metrics.increment(reason);
}
} else {
this.swarm
.state_mut()

View File

@@ -1,4 +1,5 @@
use metrics::{Counter, Gauge};
use reth_eth_wire::DisconnectReason;
use reth_metrics_derive::Metrics;
/// Metrics for the entire network, handled by NetworkManager
@@ -40,3 +41,72 @@ pub struct TransactionsManagerMetrics {
/// Total number of propagated transactions
pub(crate) propagated_transactions: Counter,
}
/// Metrics for Disconnection types
///
/// These are just counters, and ideally we would implement these metrics on a peer-by-peer basis,
/// in that we do not double-count peers for `TooManyPeers` if we make an outgoing connection and
/// get disconnected twice
#[derive(Metrics)]
#[metrics(scope = "network")]
pub struct DisconnectMetrics {
/// Number of peer disconnects due to DisconnectRequestd (0x00)
pub(crate) disconnect_requested: Counter,
/// Number of peer disconnects due to TcpSubsystemError (0x01)
pub(crate) tcp_subsystem_error: Counter,
/// Number of peer disconnects due to ProtocolBreach (0x02)
pub(crate) protocol_breach: Counter,
/// Number of peer disconnects due to UselessPeer (0x03)
pub(crate) useless_peer: Counter,
/// Number of peer disconnects due to TooManyPeers (0x04)
pub(crate) too_many_peers: Counter,
/// Number of peer disconnects due to AlreadyConnected (0x05)
pub(crate) already_connected: Counter,
/// Number of peer disconnects due to IncompatibleP2PProtocolVersion (0x06)
pub(crate) incompatible: Counter,
/// Number of peer disconnects due to NullNodeIdentity (0x07)
pub(crate) null_node_identity: Counter,
/// Number of peer disconnects due to ClientQuitting (0x08)
pub(crate) client_quitting: Counter,
/// Number of peer disconnects due to UnexpectedHandshakeIdentity (0x09)
pub(crate) unexpected_identity: Counter,
/// Number of peer disconnects due to ConnectedToSelf (0x0a)
pub(crate) connected_to_self: Counter,
/// Number of peer disconnects due to PingTimeout (0x0b)
pub(crate) ping_timeout: Counter,
/// Number of peer disconnects due to SubprotocolSpecific (0x10)
pub(crate) subprotocol_specific: Counter,
}
impl DisconnectMetrics {
/// Increments the proper counter for the given disconnect reason
pub(crate) fn increment(&self, reason: DisconnectReason) {
match reason {
DisconnectReason::DisconnectRequested => self.disconnect_requested.increment(1),
DisconnectReason::TcpSubsystemError => self.tcp_subsystem_error.increment(1),
DisconnectReason::ProtocolBreach => self.protocol_breach.increment(1),
DisconnectReason::UselessPeer => self.useless_peer.increment(1),
DisconnectReason::TooManyPeers => self.too_many_peers.increment(1),
DisconnectReason::AlreadyConnected => self.already_connected.increment(1),
DisconnectReason::IncompatibleP2PProtocolVersion => self.incompatible.increment(1),
DisconnectReason::NullNodeIdentity => self.null_node_identity.increment(1),
DisconnectReason::ClientQuitting => self.client_quitting.increment(1),
DisconnectReason::UnexpectedHandshakeIdentity => self.unexpected_identity.increment(1),
DisconnectReason::ConnectedToSelf => self.connected_to_self.increment(1),
DisconnectReason::PingTimeout => self.ping_timeout.increment(1),
DisconnectReason::SubprotocolSpecific => self.subprotocol_specific.increment(1),
}
}
}

View File

@@ -665,6 +665,16 @@ pub(crate) enum PendingSessionHandshakeError {
Ecies(ECIESError),
}
impl PendingSessionHandshakeError {
/// Returns the [`DisconnectReason`] if the error is a disconnect message
pub fn as_disconnected(&self) -> Option<DisconnectReason> {
match self {
PendingSessionHandshakeError::Eth(eth_err) => eth_err.as_disconnected(),
_ => None,
}
}
}
/// The direction of the connection.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Direction {