From 790541a710de1becd6e6c38cfeccdd7396330325 Mon Sep 17 00:00:00 2001 From: Dan Cline <6798349+Rjected@users.noreply.github.com> Date: Fri, 24 Mar 2023 00:46:55 -0400 Subject: [PATCH] feat: add peer disconnect metrics (#1956) --- crates/net/network/src/manager.rs | 17 ++++++- crates/net/network/src/metrics.rs | 70 +++++++++++++++++++++++++++ crates/net/network/src/session/mod.rs | 10 ++++ 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/crates/net/network/src/manager.rs b/crates/net/network/src/manager.rs index 502cd1f32c..22af3b43b2 100644 --- a/crates/net/network/src/manager.rs +++ b/crates/net/network/src/manager.rs @@ -23,7 +23,7 @@ use crate::{ import::{BlockImport, BlockImportOutcome, BlockValidation}, listener::ConnectionListener, message::{NewBlockMessage, PeerMessage, PeerRequest, PeerRequestSender}, - metrics::NetworkMetrics, + metrics::{DisconnectMetrics, NetworkMetrics}, network::{NetworkHandle, NetworkHandleMessage}, peers::{PeersHandle, PeersManager}, session::SessionManager, @@ -53,7 +53,7 @@ use std::{ }; use tokio::sync::mpsc; use tokio_stream::wrappers::UnboundedReceiverStream; -use tracing::{error, info, trace, warn}; +use tracing::{debug, error, info, trace, warn}; /// Manages the _entire_ state of the network. /// /// This is an endless [`Future`] that consistently drives the state of the entire network forward. @@ -107,6 +107,8 @@ pub struct NetworkManager { num_active_peers: Arc, /// Metrics for the Network metrics: NetworkMetrics, + /// Disconnect metrics for the Network + disconnect_metrics: DisconnectMetrics, } // === impl NetworkManager === @@ -231,6 +233,7 @@ where to_eth_request_handler: None, num_active_peers, metrics: Default::default(), + disconnect_metrics: Default::default(), }) } @@ -647,6 +650,7 @@ where ?total_active, "Session established" ); + debug!(target: "net", peer_enode=%NodeRecord::new(remote_addr, peer_id), "Established peer enode"); if direction.is_incoming() { this.swarm @@ -714,6 +718,9 @@ where this.metrics .outgoing_connections .set(this.swarm.state().peers().num_outbound_connections() as f64); + if let Some(reason) = reason { + this.disconnect_metrics.increment(reason); + } this.event_listeners .send(NetworkEvent::SessionClosed { peer_id, reason }); } @@ -731,6 +738,9 @@ where .peers_mut() .on_incoming_pending_session_dropped(remote_addr, err); this.metrics.pending_session_failures.increment(1); + if let Some(reason) = err.as_disconnected() { + this.disconnect_metrics.increment(reason); + } } else { this.swarm .state_mut() @@ -762,6 +772,9 @@ where err, ); this.metrics.pending_session_failures.increment(1); + if let Some(reason) = err.as_disconnected() { + this.disconnect_metrics.increment(reason); + } } else { this.swarm .state_mut() diff --git a/crates/net/network/src/metrics.rs b/crates/net/network/src/metrics.rs index 62e02ab757..523ef6254b 100644 --- a/crates/net/network/src/metrics.rs +++ b/crates/net/network/src/metrics.rs @@ -1,4 +1,5 @@ use metrics::{Counter, Gauge}; +use reth_eth_wire::DisconnectReason; use reth_metrics_derive::Metrics; /// Metrics for the entire network, handled by NetworkManager @@ -40,3 +41,72 @@ pub struct TransactionsManagerMetrics { /// Total number of propagated transactions pub(crate) propagated_transactions: Counter, } + +/// Metrics for Disconnection types +/// +/// These are just counters, and ideally we would implement these metrics on a peer-by-peer basis, +/// in that we do not double-count peers for `TooManyPeers` if we make an outgoing connection and +/// get disconnected twice +#[derive(Metrics)] +#[metrics(scope = "network")] +pub struct DisconnectMetrics { + /// Number of peer disconnects due to DisconnectRequestd (0x00) + pub(crate) disconnect_requested: Counter, + + /// Number of peer disconnects due to TcpSubsystemError (0x01) + pub(crate) tcp_subsystem_error: Counter, + + /// Number of peer disconnects due to ProtocolBreach (0x02) + pub(crate) protocol_breach: Counter, + + /// Number of peer disconnects due to UselessPeer (0x03) + pub(crate) useless_peer: Counter, + + /// Number of peer disconnects due to TooManyPeers (0x04) + pub(crate) too_many_peers: Counter, + + /// Number of peer disconnects due to AlreadyConnected (0x05) + pub(crate) already_connected: Counter, + + /// Number of peer disconnects due to IncompatibleP2PProtocolVersion (0x06) + pub(crate) incompatible: Counter, + + /// Number of peer disconnects due to NullNodeIdentity (0x07) + pub(crate) null_node_identity: Counter, + + /// Number of peer disconnects due to ClientQuitting (0x08) + pub(crate) client_quitting: Counter, + + /// Number of peer disconnects due to UnexpectedHandshakeIdentity (0x09) + pub(crate) unexpected_identity: Counter, + + /// Number of peer disconnects due to ConnectedToSelf (0x0a) + pub(crate) connected_to_self: Counter, + + /// Number of peer disconnects due to PingTimeout (0x0b) + pub(crate) ping_timeout: Counter, + + /// Number of peer disconnects due to SubprotocolSpecific (0x10) + pub(crate) subprotocol_specific: Counter, +} + +impl DisconnectMetrics { + /// Increments the proper counter for the given disconnect reason + pub(crate) fn increment(&self, reason: DisconnectReason) { + match reason { + DisconnectReason::DisconnectRequested => self.disconnect_requested.increment(1), + DisconnectReason::TcpSubsystemError => self.tcp_subsystem_error.increment(1), + DisconnectReason::ProtocolBreach => self.protocol_breach.increment(1), + DisconnectReason::UselessPeer => self.useless_peer.increment(1), + DisconnectReason::TooManyPeers => self.too_many_peers.increment(1), + DisconnectReason::AlreadyConnected => self.already_connected.increment(1), + DisconnectReason::IncompatibleP2PProtocolVersion => self.incompatible.increment(1), + DisconnectReason::NullNodeIdentity => self.null_node_identity.increment(1), + DisconnectReason::ClientQuitting => self.client_quitting.increment(1), + DisconnectReason::UnexpectedHandshakeIdentity => self.unexpected_identity.increment(1), + DisconnectReason::ConnectedToSelf => self.connected_to_self.increment(1), + DisconnectReason::PingTimeout => self.ping_timeout.increment(1), + DisconnectReason::SubprotocolSpecific => self.subprotocol_specific.increment(1), + } + } +} diff --git a/crates/net/network/src/session/mod.rs b/crates/net/network/src/session/mod.rs index 03d67f915f..0582686b81 100644 --- a/crates/net/network/src/session/mod.rs +++ b/crates/net/network/src/session/mod.rs @@ -665,6 +665,16 @@ pub(crate) enum PendingSessionHandshakeError { Ecies(ECIESError), } +impl PendingSessionHandshakeError { + /// Returns the [`DisconnectReason`] if the error is a disconnect message + pub fn as_disconnected(&self) -> Option { + match self { + PendingSessionHandshakeError::Eth(eth_err) => eth_err.as_disconnected(), + _ => None, + } + } +} + /// The direction of the connection. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Direction {