mirror of
https://github.com/paradigmxyz/reth.git
synced 2026-04-30 03:01:58 -04:00
feat(metrics): add MerkleChangeSets stage performance metrics
- Add comprehensive metrics to track MerkleChangeSets stage execution: * execution_duration: Time to execute stage per run * blocks_per_execution: Number of blocks processed per run * per_block_duration: Average processing time per block * checkpoint_block: Current checkpoint block number * checkpoint_lag: Distance between checkpoint and finalized tip * execution_count: Total number of stage executions - Add Grafana dashboard panels: * MerkleChangeSets - Execution Duration panel * MerkleChangeSets - Blocks Per Execution panel * New row for MerkleChangeSets Stage Performance metrics These metrics will provide visibility into why the overlay state provider checkpoint lags 45-74 blocks behind, causing 600-700ms spikes when reverts are required. Expected baseline: ~64 blocks per execution (default retention). Part of implementation plan to reduce overlay provider revert spikes.
This commit is contained in:
@@ -17,6 +17,35 @@ use reth_trie_db::{DatabaseHashedPostState, DatabaseStateRoot};
|
||||
use std::ops::Range;
|
||||
use tracing::{debug, error};
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
use reth_metrics::{
|
||||
metrics::{Counter, Gauge, Histogram},
|
||||
Metrics,
|
||||
};
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
#[derive(Clone, Metrics)]
|
||||
#[metrics(scope = "stages.merkle_changesets")]
|
||||
struct MerkleChangeSetsMetrics {
|
||||
/// Total execution duration per stage run
|
||||
execution_duration: Histogram,
|
||||
|
||||
/// Number of blocks processed per execution
|
||||
blocks_per_execution: Histogram,
|
||||
|
||||
/// Time to process one block (average)
|
||||
per_block_duration: Histogram,
|
||||
|
||||
/// Current checkpoint block number
|
||||
checkpoint_block: Gauge,
|
||||
|
||||
/// Checkpoint lag in blocks (tip - checkpoint)
|
||||
checkpoint_lag: Gauge,
|
||||
|
||||
/// Number of stage executions
|
||||
execution_count: Counter,
|
||||
}
|
||||
|
||||
/// The `MerkleChangeSets` stage.
|
||||
///
|
||||
/// This stage processes and maintains trie changesets from the finalized block to the latest block.
|
||||
@@ -25,17 +54,35 @@ pub struct MerkleChangeSets {
|
||||
/// The number of blocks to retain changesets for, used as a fallback when the finalized block
|
||||
/// is not found. Defaults to 64 (2 epochs in beacon chain).
|
||||
retention_blocks: u64,
|
||||
#[cfg(feature = "metrics")]
|
||||
metrics: MerkleChangeSetsMetrics,
|
||||
}
|
||||
|
||||
impl MerkleChangeSets {
|
||||
/// Creates a new `MerkleChangeSets` stage with default retention blocks of 64.
|
||||
pub const fn new() -> Self {
|
||||
Self { retention_blocks: 64 }
|
||||
#[cfg(not(feature = "metrics"))]
|
||||
{
|
||||
Self { retention_blocks: 64 }
|
||||
}
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
{
|
||||
Self { retention_blocks: 64, metrics: MerkleChangeSetsMetrics::default() }
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new `MerkleChangeSets` stage with a custom finalized block height.
|
||||
pub const fn with_retention_blocks(retention_blocks: u64) -> Self {
|
||||
Self { retention_blocks }
|
||||
#[cfg(not(feature = "metrics"))]
|
||||
{
|
||||
Self { retention_blocks }
|
||||
}
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
{
|
||||
Self { retention_blocks, metrics: MerkleChangeSetsMetrics::default() }
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the range of blocks which are already computed. Will return an empty range if none
|
||||
@@ -297,6 +344,12 @@ where
|
||||
}
|
||||
|
||||
fn execute(&mut self, provider: &Provider, input: ExecInput) -> Result<ExecOutput, StageError> {
|
||||
#[cfg(feature = "metrics")]
|
||||
let execution_start = std::time::Instant::now();
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
self.metrics.execution_count.increment(1);
|
||||
|
||||
// Get merkle checkpoint and assert that the target is the same.
|
||||
let merkle_checkpoint = provider
|
||||
.get_stage_checkpoint(StageId::MerkleExecute)?
|
||||
@@ -353,6 +406,11 @@ where
|
||||
computed_range = target_range.clone();
|
||||
}
|
||||
|
||||
let blocks_count = target_range.end.saturating_sub(target_range.start);
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
self.metrics.blocks_per_execution.record(blocks_count as f64);
|
||||
|
||||
// Populate the target range with changesets
|
||||
Self::populate_range(provider, target_range)?;
|
||||
|
||||
@@ -370,6 +428,26 @@ where
|
||||
// `computed_range.end` is exclusive.
|
||||
let checkpoint = StageCheckpoint::new(computed_range.end.saturating_sub(1));
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
{
|
||||
let execution_duration = execution_start.elapsed();
|
||||
self.metrics.execution_duration.record(execution_duration.as_secs_f64());
|
||||
|
||||
if blocks_count > 0 {
|
||||
let per_block = execution_duration.as_secs_f64() / blocks_count as f64;
|
||||
self.metrics.per_block_duration.record(per_block);
|
||||
}
|
||||
|
||||
// Record checkpoint state
|
||||
self.metrics.checkpoint_block.set(checkpoint.block_number as f64);
|
||||
|
||||
// Calculate lag
|
||||
if let Ok(Some(tip)) = provider.last_finalized_block_number() {
|
||||
let lag = tip.saturating_sub(checkpoint.block_number);
|
||||
self.metrics.checkpoint_lag.set(lag as f64);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ExecOutput::done(checkpoint))
|
||||
}
|
||||
|
||||
|
||||
213
dashboard.json
213
dashboard.json
@@ -6290,6 +6290,219 @@
|
||||
"title": "State root latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 177
|
||||
},
|
||||
"id": 400,
|
||||
"panels": [],
|
||||
"title": "MerkleChangeSets Stage Performance",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"description": "Time to execute MerkleChangeSets stage. High values indicate stage is slow.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 178
|
||||
},
|
||||
"id": 401,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "reth_stages_merkle_changesets_execution_duration{$instance_label=\"$instance\",quantile=~\"(0.5|0.9|0.95|1)\"}",
|
||||
"legendFormat": "{{quantile}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "MerkleChangeSets - Execution Duration",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"description": "Number of blocks processed per stage execution. High values mean infrequent updates causing checkpoint lag.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 100
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "blocks"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 178
|
||||
},
|
||||
"id": 402,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "reth_stages_merkle_changesets_blocks_per_execution{$instance_label=\"$instance\",quantile=~\"(0.5|0.9|0.95|1)\"}",
|
||||
"legendFormat": "{{quantile}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "MerkleChangeSets - Blocks Per Execution",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
|
||||
Reference in New Issue
Block a user