feat(metrics): track revert collection counts in MerkleChangeSets stage

Add metrics to measure the workload size of revert collection: Metrics Added: - state_reverts_collected: Total state items (accounts + storage slots) - trie_reverts_collected: Total trie items (account + storage nodes) - state_reverts_per_block: Average state reverts per block - trie_reverts_per_block: Average trie reverts per block Implementation: - Created RevertCounts struct to return counts from populate_range() - Track state revert counts during HashedPostState::from_reverts() loop - Track trie revert counts using TrieUpdatesSorted::total_len() - Record all metrics in execute() method Dashboard: - Added 'State Reverts Collected' panel (id: 403) - Added 'Trie Reverts Collected' panel (id: 404) - Both panels at y:186, side-by-side below existing MerkleChangeSets metrics These metrics will help understand: 1. Correlation between revert count and execution time 2. Whether workload scales linearly with block count 3. If optimization is needed for high revert scenarios Part of overlay provider performance analysis to reduce 600-700ms spikes.
2026-04-30 03:01:58 -04:00 · 2025-10-31 17:34:43 +08:00
parent f3d0858f18
commit 364910acb4
4 changed files with 1085 additions and 5 deletions
--- a/IMPLEMENTATION_PLAN.md
+++ b/IMPLEMENTATION_PLAN.md
@@ -0,0 +1,568 @@
+# Implementation Plan: Reduce Overlay State Provider Spikes
+
+## 🎯 Goal
+Reduce 600-700ms overlay provider spikes to <200ms by addressing MerkleChangeSets checkpoint lag.
+
+## 📋 Step-by-Step Implementation
+
+### Step 1: Add MerkleChangeSets Metrics (Priority: CRITICAL)
+
+This gives us visibility into WHY the checkpoint lags.
+
+#### **File: `crates/stages/stages/src/stages/merkle_changesets.rs`**
+
+**A. Add metrics struct at top of file:**
+
+```rust
+#[cfg(feature = "metrics")]
+use reth_metrics::{
+    metrics::{Counter, Gauge, Histogram},
+    Metrics,
+};
+
+#[cfg(feature = "metrics")]
+#[derive(Clone, Metrics)]
+#[metrics(scope = "stages.merkle_changesets")]
+struct MerkleChangeSetsMetrics {
+    /// Total execution duration per stage run
+    execution_duration: Histogram,
+
+    /// Number of blocks processed per execution
+    blocks_per_execution: Histogram,
+
+    /// Time to process one block (average)
+    per_block_duration: Histogram,
+
+    /// Current checkpoint block number
+    checkpoint_block: Gauge,
+
+    /// Checkpoint lag in blocks (tip - checkpoint)
+    checkpoint_lag: Gauge,
+
+    /// Number of stage executions
+    execution_count: Counter,
+}
+```
+
+**B. Add metrics field to MerkleChangeSets struct:**
+
+```rust
+#[derive(Debug, Clone)]
+pub struct MerkleChangeSets {
+    retention_blocks: u64,
+    #[cfg(feature = "metrics")]
+    metrics: MerkleChangeSetsMetrics,
+}
+```
+
+**C. Update constructors:**
+
+```rust
+pub const fn new() -> Self {
+    #[cfg(not(feature = "metrics"))]
+    {
+        Self { retention_blocks: 64 }
+    }
+
+    #[cfg(feature = "metrics")]
+    {
+        Self {
+            retention_blocks: 64,
+            metrics: MerkleChangeSetsMetrics::default(),
+        }
+    }
+}
+
+pub const fn with_retention_blocks(retention_blocks: u64) -> Self {
+    #[cfg(not(feature = "metrics"))]
+    {
+        Self { retention_blocks }
+    }
+
+    #[cfg(feature = "metrics")]
+    {
+        Self {
+            retention_blocks,
+            metrics: MerkleChangeSetsMetrics::default(),
+        }
+    }
+}
+```
+
+**D. Instrument execute() method:**
+
+Find the `fn execute()` method around line 299 and add:
+
+```rust
+fn execute(&mut self, provider: &Provider, input: ExecInput) -> Result<ExecOutput, StageError> {
+    #[cfg(feature = "metrics")]
+    let execution_start = std::time::Instant::now();
+
+    #[cfg(feature = "metrics")]
+    self.metrics.execution_count.increment(1);
+
+    // ... existing code ...
+
+    let target_range = self.determine_target_range(provider)?;
+    let blocks_count = target_range.end.saturating_sub(target_range.start);
+
+    #[cfg(feature = "metrics")]
+    self.metrics.blocks_per_execution.record(blocks_count as f64);
+
+    // ... existing population code ...
+
+    // At the end, before returning:
+    #[cfg(feature = "metrics")]
+    {
+        let execution_duration = execution_start.elapsed();
+        self.metrics.execution_duration.record(execution_duration.as_secs_f64());
+
+        if blocks_count > 0 {
+            let per_block = execution_duration.as_secs_f64() / blocks_count as f64;
+            self.metrics.per_block_duration.record(per_block);
+        }
+
+        // Record checkpoint state
+        if let Some(checkpoint) = input.checkpoint {
+            self.metrics.checkpoint_block.set(checkpoint.block_number as f64);
+
+            // Calculate lag
+            if let Ok(Some(tip)) = provider.last_finalized_block_number() {
+                let lag = tip.saturating_sub(checkpoint.block_number);
+                self.metrics.checkpoint_lag.set(lag as f64);
+            }
+        }
+    }
+
+    Ok(output)
+}
+```
+
+---
+
+### Step 2: Add Grafana Panels for New Metrics
+
+#### **File: `dashboard.json`**
+
+Add these panels after the existing "Overlay State Provider - Checkpoint Delta" panel:
+
+```json
+{
+  "collapsed": false,
+  "gridPos": {
+    "h": 1,
+    "w": 24,
+    "x": 0,
+    "y": 177
+  },
+  "id": 400,
+  "panels": [],
+  "title": "MerkleChangeSets Stage Performance",
+  "type": "row"
+},
+{
+  "datasource": {
+    "type": "prometheus",
+    "uid": "${datasource}"
+  },
+  "description": "Time to execute MerkleChangeSets stage. High values indicate stage is slow.",
+  "fieldConfig": {
+    "defaults": {
+      "color": {
+        "mode": "palette-classic"
+      },
+      "custom": {
+        "axisBorderShow": false,
+        "axisCenteredZero": false,
+        "axisColorMode": "text",
+        "axisLabel": "",
+        "axisPlacement": "auto",
+        "barAlignment": 0,
+        "drawStyle": "line",
+        "fillOpacity": 0,
+        "gradientMode": "none",
+        "hideFrom": {
+          "legend": false,
+          "tooltip": false,
+          "viz": false
+        },
+        "lineInterpolation": "linear",
+        "lineWidth": 1,
+        "pointSize": 5,
+        "scaleDistribution": {
+          "type": "linear"
+        },
+        "showPoints": "auto",
+        "spanNulls": false,
+        "stacking": {
+          "group": "A",
+          "mode": "none"
+        },
+        "thresholdsStyle": {
+          "mode": "off"
+        }
+      },
+      "mappings": [],
+      "thresholds": {
+        "mode": "absolute",
+        "steps": [
+          {
+            "color": "green",
+            "value": 0
+          },
+          {
+            "color": "yellow",
+            "value": 1
+          },
+          {
+            "color": "red",
+            "value": 5
+          }
+        ]
+      },
+      "unit": "s"
+    },
+    "overrides": []
+  },
+  "gridPos": {
+    "h": 8,
+    "w": 12,
+    "x": 0,
+    "y": 178
+  },
+  "id": 401,
+  "options": {
+    "legend": {
+      "calcs": [
+        "mean",
+        "max"
+      ],
+      "displayMode": "table",
+      "placement": "bottom",
+      "showLegend": true
+    },
+    "tooltip": {
+      "mode": "multi",
+      "sort": "none"
+    }
+  },
+  "targets": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "expr": "reth_stages_merkle_changesets_execution_duration{$instance_label=\"$instance\",quantile=~\"(0.5|0.9|0.95|1)\"}",
+      "legendFormat": "{{quantile}}",
+      "refId": "A"
+    }
+  ],
+  "title": "MerkleChangeSets - Execution Duration",
+  "type": "timeseries"
+},
+{
+  "datasource": {
+    "type": "prometheus",
+    "uid": "${datasource}"
+  },
+  "description": "Number of blocks processed per stage execution. High values mean infrequent updates causing checkpoint lag.",
+  "fieldConfig": {
+    "defaults": {
+      "color": {
+        "mode": "palette-classic"
+      },
+      "custom": {
+        "axisBorderShow": false,
+        "axisCenteredZero": false,
+        "axisColorMode": "text",
+        "axisLabel": "",
+        "axisPlacement": "auto",
+        "barAlignment": 0,
+        "drawStyle": "line",
+        "fillOpacity": 0,
+        "gradientMode": "none",
+        "hideFrom": {
+          "legend": false,
+          "tooltip": false,
+          "viz": false
+        },
+        "lineInterpolation": "linear",
+        "lineWidth": 1,
+        "pointSize": 5,
+        "scaleDistribution": {
+          "type": "linear"
+        },
+        "showPoints": "auto",
+        "spanNulls": false,
+        "stacking": {
+          "group": "A",
+          "mode": "none"
+        },
+        "thresholdsStyle": {
+          "mode": "off"
+        }
+      },
+      "mappings": [],
+      "thresholds": {
+        "mode": "absolute",
+        "steps": [
+          {
+            "color": "green",
+            "value": 0
+          },
+          {
+            "color": "yellow",
+            "value": 50
+          },
+          {
+            "color": "red",
+            "value": 100
+          }
+        ]
+      },
+      "unit": "blocks"
+    },
+    "overrides": []
+  },
+  "gridPos": {
+    "h": 8,
+    "w": 12,
+    "x": 12,
+    "y": 178
+  },
+  "id": 402,
+  "options": {
+    "legend": {
+      "calcs": [
+        "mean",
+        "max"
+      ],
+      "displayMode": "table",
+      "placement": "bottom",
+      "showLegend": true
+    },
+    "tooltip": {
+      "mode": "multi",
+      "sort": "none"
+    }
+  },
+  "targets": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "expr": "reth_stages_merkle_changesets_blocks_per_execution{$instance_label=\"$instance\",quantile=~\"(0.5|0.9|0.95|1)\"}",
+      "legendFormat": "{{quantile}}",
+      "refId": "A"
+    }
+  ],
+  "title": "MerkleChangeSets - Blocks Per Execution",
+  "type": "timeseries"
+}
+```
+
+---
+
+### Step 3: Test and Baseline
+
+**Commands to run:**
+
+```bash
+# 1. Format code
+cargo +nightly fmt --all
+
+# 2. Build with metrics
+cargo build --release --features metrics
+
+# 3. Run and collect metrics for 10 minutes
+# Query Prometheus:
+rate(reth_stages_merkle_changesets_execution_count[5m])
+reth_stages_merkle_changesets_blocks_per_execution{quantile="0.5"}
+reth_stages_merkle_changesets_checkpoint_lag
+```
+
+**Baseline expectations:**
+- Execution frequency: Should see ~1-2 per minute
+- Blocks per execution: Probably 50-70 (explains 45-74 block lag!)
+- Checkpoint lag: Should match earlier observations (45-74 blocks)
+
+---
+
+### Step 4: Implement Fix (Reduce Batch Size)
+
+Once you confirm batch size is too large:
+
+#### **Find where MerkleChangeSets is constructed**
+
+```bash
+# Search for where stage is created
+rg "MerkleChangeSets::new|MerkleChangeSets::with_retention_blocks" --type rust
+```
+
+Likely in `crates/node/builder/src/launch/` or similar pipeline setup.
+
+#### **Change from:**
+
+```rust
+MerkleChangeSets::new()  // Default 64 blocks
+```
+
+#### **To:**
+
+```rust
+MerkleChangeSets::with_retention_blocks(20)  // Update every 20 blocks
+```
+
+**Or** add time-based triggering (more complex):
+
+```rust
+// In stage execution loop
+if last_merkle_update.elapsed() > Duration::from_secs(10) {
+    stage.execute(provider, input)?;
+    last_merkle_update = Instant::now();
+}
+```
+
+---
+
+### Step 5: Validate Fix
+
+**After deploying fix:**
+
+```bash
+# Check new metrics
+reth_stages_merkle_changesets_blocks_per_execution{quantile="0.5"}
+# Should be ~20 now (was 50-70)
+
+reth_stages_merkle_changesets_checkpoint_lag
+# Should be 0-20 blocks (was 45-74)
+
+# Check overlay metrics
+rate(reth_storage_overlay_state_provider_reverts_required[5m])
+# Should stay ~1.7 req/s but...
+
+# Check overlay duration
+reth_storage_overlay_state_provider_trie_reverts_duration{quantile="0.9"}
+# Should drop to ~100-200ms (was 500-600ms)
+
+reth_storage_overlay_state_provider_total_database_provider_ro_duration{quantile="0.9"}
+# Should drop to ~200-300ms (was 600-700ms)
+```
+
+**Success criteria:**
+- ✅ Checkpoint lag: <25 blocks (was 45-74)
+- ✅ Trie revert duration: <250ms (was 500-600ms)
+- ✅ Total overlay duration: <350ms (was 600-700ms)
+- ✅ No regression in overall throughput
+
+---
+
+### Step 6: Add Alerting
+
+#### **File: `alerting_rules.yml` (or similar)**
+
+```yaml
+groups:
+  - name: merkle_changesets
+    interval: 30s
+    rules:
+      - alert: MerkleChangeSetsCheckpointLagging
+        expr: |
+          reth_stages_merkle_changesets_checkpoint_lag > 50
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "MerkleChangeSets checkpoint is {{ $value }} blocks behind"
+          description: "Checkpoint lag >50 blocks. Target: <25 blocks. Check stage execution frequency."
+
+      - alert: MerkleChangeSetsExecutionSlow
+        expr: |
+          reth_stages_merkle_changesets_execution_duration{quantile="0.9"} > 5
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "MerkleChangeSets taking {{ $value }}s to execute (p90)"
+          description: "Stage execution >5s. Check DB performance or reduce batch size."
+
+      - alert: OverlayRevertsSpiking
+        expr: |
+          reth_storage_overlay_state_provider_trie_reverts_duration{quantile="0.9"} > 0.5
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Overlay trie reverts taking {{ $value }}s (p90)"
+          description: "Revert duration >500ms. Check MerkleChangeSets checkpoint lag."
+```
+
+---
+
+## 🧪 Testing Checklist
+
+### Before Fix
+- [ ] Baseline checkpoint lag metric
+- [ ] Baseline blocks per execution
+- [ ] Baseline overlay revert duration
+- [ ] Baseline Engine API throughput
+
+### After Fix
+- [ ] Checkpoint lag reduced to <25 blocks
+- [ ] Overlay revert duration reduced to <250ms
+- [ ] No throughput regression
+- [ ] Alerts configured and tested
+
+### Production Validation
+- [ ] Deploy to staging first
+- [ ] Run for 24 hours
+- [ ] Compare metrics before/after
+- [ ] Validate under peak load
+- [ ] Deploy to production
+
+---
+
+## 📊 Expected Results
+
+| Metric | Before | After | Improvement |
+|--------|--------|-------|-------------|
+| Checkpoint Lag | 45-74 blocks | 10-20 blocks | 60-70% ↓ |
+| Trie Reverts | 500-600ms | 100-200ms | 65-80% ↓ |
+| Total Overlay | 600-700ms | 200-300ms | 60-70% ↓ |
+| Impact Rate | 1.72 req/s | 1.72 req/s | Same |
+| State Root | 5-10μs | 5-10μs | Unchanged |
+
+---
+
+## 🚨 Rollback Plan
+
+If fix causes issues:
+
+1. **Revert retention_blocks change**:
+   ```rust
+   MerkleChangeSets::new()  // Back to default 64
+   ```
+
+2. **Monitor for recovery**:
+   ```bash
+   # Checkpoint lag should return to baseline
+   reth_stages_merkle_changesets_checkpoint_lag
+   ```
+
+3. **Alternative approach**: Implement async revert fetching instead
+
+---
+
+## 📝 Next Steps
+
+1. ✅ Add MerkleChangeSets metrics (Step 1)
+2. ✅ Add Grafana panels (Step 2)
+3. ✅ Collect baseline data (Step 3)
+4. 🎯 Implement fix based on baseline (Step 4)
+5. 📊 Validate results (Step 5)
+6. 🚨 Configure alerts (Step 6)
+
+**Estimated timeline**: 1-2 weeks
+
+Want me to start implementing Step 1 (add the metrics)?
--- a/OVERLAY_ANALYSIS.md
+++ b/OVERLAY_ANALYSIS.md
@@ -0,0 +1,286 @@
+# Overlay State Provider Performance Analysis
+
+## 🚨 Critical Finding
+
+**600-700ms spikes in Overlay State Provider are NOT reth-bench specific** - they occur in production during Engine API block validation!
+
+## 📊 Root Cause
+
+### Where block_hash is Set (Production Code)
+
+Found in `crates/engine/tree/src/tree/payload_validator.rs:655` and `:785`:
+
+```rust
+let factory = OverlayStateProviderFactory::new(self.provider.clone())
+    .with_block_hash(Some(block_hash))  // ← Sets block_hash to parent block
+    .with_trie_overlay(Some(multiproof_config.nodes_sorted))
+    .with_hashed_state_overlay(Some(multiproof_config.state_sorted));
+```
+
+**Context**: This is in the **payload validator** that validates blocks received from the consensus layer via Engine API.
+
+**Why block_hash is set**: The validator needs state at the parent block to validate the new block's state transitions.
+
+### Why Checkpoint Lags
+
+**MerkleChangeSets stage** (`crates/stages/stages/src/stages/merkle_changesets.rs`):
+- Runs as part of the **staged sync pipeline**
+- Only updates when **MerkleExecute stage** completes
+- Processes blocks in **batches** based on finalized blocks or retention window (default 64 blocks)
+- Not real-time - runs periodically as part of sync
+
+**The Problem**:
+1. New blocks are processed by Engine API → Execution stage
+2. MerkleChangeSets stage lags behind MerkleExecute
+3. Checkpoint can be 45-74 blocks behind current tip
+4. Overlay provider needs reverts when `requested_block > checkpoint`
+5. **600ms spent fetching reverts** for 45-74 blocks of history
+
+## 🎯 Impact Analysis
+
+### Metrics Summary
+- **Frequency**: 1.72 req/s (0.4% of overlay provider calls)
+- **Cost per call**: 600-700ms
+  - Trie Reverts: 500-600ms
+  - State Reverts: 60-120ms
+- **Total overhead**: ~1 second of DB queries per second
+- **Throughput correlation**: One observed 57% drop (14:40), but not consistent
+
+### What's NOT Affected
+✅ **State root computation**: Stays 5-10μs (completely unaffected)
+✅ **95%+ of overlay calls**: Use fast path (no reverts)
+✅ **Block processing**: No direct correlation with spikes
+
+### What IS Affected
+⚠️ **Engine API validation**: When checkpoint lags, validation requires expensive reverts
+⚠️ **Database load**: 500-600ms of read queries per affected call
+⚠️ **Tail latency**: p99 throughput can drop during revert fetches
+
+## 🔍 Why This Happens
+
+```
+Timeline of Events:
+
+14:40:00 ──────────────────────────────────────────────────────► 14:46:00
+    │                                                               │
+    ├─ Engine API receives new block (parent = block N)           │
+    │  - Sets block_hash = N (parent block)                        │
+    │  - Checkpoint at block N-60 (lagging!)                       │
+    │  - Needs reverts: N-60 → N (60 blocks!)                      │
+    │  - Trie fetch: 500ms for 60 blocks                           │
+    │  - State fetch: 60ms for 60 blocks                           │
+    │  - Total: 600ms spike                                        │
+    │                                                               │
+    ├─ MerkleChangeSets runs (periodic)                            │
+    │  - Updates checkpoint to block N                             │
+    │  - Next validation: no reverts needed!                       │
+    │                                                               │
+    └─ Process repeats every ~70 blocks                            │
+```
+
+## 🛠️ Solution Options
+
+### Option 1: Run MerkleChangeSets More Frequently ⭐ RECOMMENDED
+
+**Change**: Reduce batch size from current (probably 64+ blocks) to smaller batches (10-20 blocks)
+
+**How**:
+1. Find MerkleChangeSets configuration in pipeline setup
+2. Reduce `retention_blocks` or add time-based trigger
+3. Make stage run every 10-20 blocks instead of 64+
+
+**Pros**:
+- ✅ Reduces checkpoint lag to 10-20 blocks
+- ✅ Reduces revert fetch time to ~100-200ms (vs 600ms)
+- ✅ More consistent performance
+
+**Cons**:
+- ⚠️ More frequent checkpoint writes (increased DB I/O)
+- ⚠️ Need to measure impact on overall throughput
+
+**Implementation**:
+```rust
+// In stage pipeline configuration
+MerkleChangeSets::with_retention_blocks(20)  // Was: 64
+```
+
+### Option 2: Optimize Revert Fetching
+
+**Change**: Cache recent reverts or make fetching async
+
+**A. LRU Cache**:
+```rust
+struct OverlayStateProviderFactory<F> {
+    factory: F,
+    revert_cache: Arc<Mutex<LruCache<(BlockNumber, BlockNumber), CachedReverts>>>,
+}
+```
+
+**B. Async Fetching**:
+```rust
+// Don't block overlay creation - fetch reverts in background
+let revert_future = tokio::spawn(async move {
+    provider.trie_reverts(from_block + 1)
+});
+```
+
+**Pros**:
+- ✅ Reduces blocking time
+- ✅ Can help with repeated queries
+
+**Cons**:
+- ⚠️ Adds complexity
+- ⚠️ Cache may not help much (queries are for different ranges)
+- ⚠️ Async doesn't reduce actual DB query time
+
+### Option 3: Accept Current Behavior ⚠️
+
+**If**:
+- 0.4% of calls taking 600ms is acceptable
+- Throughput impact is minimal
+- State root (critical path) is unaffected
+
+**Then**: Document as expected behavior, add monitoring/alerting
+
+### Option 4: Optimize MerkleChangeSets Stage Itself
+
+**Change**: Make the stage itself faster so it can keep up
+
+**How**:
+- Profile `HashedPostState::from_reverts()` (line 195-198)
+- Optimize trie update calculations (line 234-253)
+- Parallelize block processing if possible
+
+**Pros**:
+- ✅ Benefits all operations, not just overlay
+- ✅ Reduces overall sync time
+
+**Cons**:
+- ⚠️ Most complex solution
+- ⚠️ May have limited optimization potential
+
+## 📈 Additional Metrics Needed
+
+### 1. MerkleChangeSets Performance
+Add to `crates/stages/stages/src/stages/merkle_changesets.rs`:
+
+```rust
+#[cfg(feature = "metrics")]
+use reth_metrics::{metrics::{Counter, Histogram, Gauge}, Metrics};
+
+#[derive(Metrics)]
+#[metrics(scope = "stages.merkle_changesets")]
+struct MerkleChangeSetsMetrics {
+    /// Time to execute stage
+    execution_duration: Histogram,
+    /// Blocks processed per execution
+    blocks_per_execution: Histogram,
+    /// Current checkpoint block
+    checkpoint_block: Gauge,
+    /// Checkpoint lag (tip - checkpoint)
+    checkpoint_lag: Gauge,
+}
+```
+
+**Add instrumentation**:
+```rust
+fn execute(&mut self, provider: &Provider, input: ExecInput) -> Result<ExecOutput, StageError> {
+    #[cfg(feature = "metrics")]
+    let _timer = start_timer(&self.metrics.execution_duration);
+
+    let target_range = self.determine_target_range(provider)?;
+    let blocks_count = target_range.end - target_range.start;
+
+    #[cfg(feature = "metrics")]
+    self.metrics.blocks_per_execution.record(blocks_count as f64);
+
+    // ... rest of execution
+
+    #[cfg(feature = "metrics")]
+    {
+        self.metrics.checkpoint_block.set(checkpoint as f64);
+        let tip = provider.best_block_number()?;
+        self.metrics.checkpoint_lag.set((tip - checkpoint) as f64);
+    }
+}
+```
+
+### 2. Overlay Usage Tracking
+Add to `crates/engine/tree/src/tree/payload_validator.rs`:
+
+```rust
+#[cfg(feature = "metrics")]
+use reth_metrics::metrics::Counter;
+
+// Track when overlay is created with block_hash
+#[cfg(feature = "metrics")]
+static OVERLAY_WITH_BLOCK_HASH: Counter =
+    Counter::new("engine_payload_validator_overlay_with_block_hash");
+
+// Before creating factory:
+#[cfg(feature = "metrics")]
+OVERLAY_WITH_BLOCK_HASH.increment(1);
+```
+
+### 3. Grafana Alerts
+
+```yaml
+# Checkpoint lag alert
+- alert: MerkleChangeSetsLagging
+  expr: |
+    (reth_sync_checkpoint{stage="MerkleChangeSets"}
+     - reth_best_block_number) > 100
+  for: 5m
+  annotations:
+    summary: "Checkpoint >100 blocks behind"
+
+# High revert rate alert
+- alert: OverlayRevertsFrequent
+  expr: |
+    rate(reth_storage_overlay_state_provider_reverts_required[5m]) > 5
+  for: 5m
+  annotations:
+    summary: "Overlay reverts at {{ $value }} req/s"
+```
+
+## 🎯 Recommended Action Plan
+
+### Phase 1: Add Metrics (Week 1)
+1. ✅ Add MerkleChangeSets stage metrics
+2. ✅ Add checkpoint lag gauge
+3. ✅ Add overlay usage tracking
+4. ✅ Deploy and collect baseline data
+
+### Phase 2: Quick Win (Week 2)
+5. 🎯 **Reduce MerkleChangeSets batch size** from 64 to 20 blocks
+6. 📊 Measure impact:
+   - Checkpoint lag should drop to 0-20 blocks
+   - Revert duration should drop to ~100-200ms
+   - Monitor overall throughput for regression
+
+### Phase 3: Optimize (Week 3-4, if needed)
+7. If Phase 2 insufficient:
+   - Profile MerkleChangeSets execution
+   - Consider async revert fetching
+   - Consider LRU cache for recent ranges
+
+### Phase 4: Production Validation (Week 4)
+8. Compare metrics in production vs test
+9. Validate solution works under real load
+10. Document final performance characteristics
+
+## 📝 Key Takeaways
+
+1. ✅ **Root cause identified**: MerkleChangeSets checkpoint lags 45-74 blocks
+2. ⚠️ **Not reth-bench specific**: Happens in production Engine API validation
+3. ✅ **Impact is measurable but limited**: 0.4% of calls, doesn't affect state root
+4. 🎯 **Solution is clear**: Run MerkleChangeSets more frequently
+5. 📊 **Need better observability**: Add stage performance metrics
+
+## 🔗 Related Code Locations
+
+- Overlay provider: `crates/storage/provider/src/providers/state/overlay.rs`
+- Overlay metrics: `crates/storage/provider/src/providers/state/overlay_metrics.rs`
+- Payload validator: `crates/engine/tree/src/tree/payload_validator.rs:655,785`
+- MerkleChangeSets stage: `crates/stages/stages/src/stages/merkle_changesets.rs`
+- Dashboard: `dashboard.json` (panels 303-311)
--- a/crates/stages/stages/src/stages/merkle_changesets.rs
+++ b/crates/stages/stages/src/stages/merkle_changesets.rs
@@ -44,6 +44,18 @@ struct MerkleChangeSetsMetrics {

    /// Number of stage executions
    execution_count: Counter,
+
+    /// Total state revert items collected per execution
+    state_reverts_collected: Histogram,
+
+    /// Total trie revert items collected per execution
+    trie_reverts_collected: Histogram,
+
+    /// Average state reverts per block
+    state_reverts_per_block: Histogram,
+
+    /// Average trie reverts per block
+    trie_reverts_per_block: Histogram,
 }

 /// The `MerkleChangeSets` stage.
@@ -58,6 +70,13 @@ pub struct MerkleChangeSets {
    metrics: MerkleChangeSetsMetrics,
 }

+/// Counts of reverts collected during populate_range execution
+#[derive(Debug, Default)]
+struct RevertCounts {
+    total_state_reverts: usize,
+    total_trie_reverts: usize,
+}
+
 impl MerkleChangeSets {
    /// Creates a new `MerkleChangeSets` stage with default retention blocks of 64.
    pub const fn new() -> Self {
@@ -200,7 +219,7 @@ impl MerkleChangeSets {
    fn populate_range<Provider>(
        provider: &Provider,
        target_range: Range<BlockNumber>,
-    ) -> Result<(), StageError>
+    ) -> Result<RevertCounts, StageError>
    where
        Provider: StageCheckpointReader
            + TrieWriter
@@ -238,11 +257,19 @@ impl MerkleChangeSets {
            "Computing per-block state reverts",
        );
        let mut per_block_state_reverts = Vec::new();
+        let mut revert_counts = RevertCounts::default();
+
        for block_number in target_range.clone() {
-            per_block_state_reverts.push(HashedPostState::from_reverts::<KeccakKeyHasher>(
+            let state_revert = HashedPostState::from_reverts::<KeccakKeyHasher>(
                provider.tx_ref(),
                block_number..=block_number,
-            )?);
+            )?;
+
+            // Count state reverts (accounts + storage slots)
+            revert_counts.total_state_reverts += state_revert.accounts.len() +
+                state_revert.storages.values().map(|s| s.storage.len()).sum::<usize>();
+
+            per_block_state_reverts.push(state_revert);
        }

        // Helper to retrieve state revert data for a specific block from the pre-computed array
@@ -305,6 +332,9 @@ impl MerkleChangeSets {
            input.nodes.extend_ref(&this_trie_updates);
            let this_trie_updates = this_trie_updates.into_sorted();

+            // Count trie reverts
+            revert_counts.total_trie_reverts += this_trie_updates.total_len();
+
            // Write the changesets to the DB using the trie updates produced by the block, and the
            // trie reverts as the overlay.
            debug!(
@@ -319,7 +349,7 @@ impl MerkleChangeSets {
            )?;
        }

-        Ok(())
+        Ok(revert_counts)
    }
 }

@@ -412,7 +442,7 @@ where
        self.metrics.blocks_per_execution.record(blocks_count as f64);

        // Populate the target range with changesets
-        Self::populate_range(provider, target_range)?;
+        let revert_counts = Self::populate_range(provider, target_range)?;

        // Update the prune checkpoint to reflect that all data before `computed_range.start`
        // is not available.
@@ -446,6 +476,18 @@ where
                let lag = tip.saturating_sub(checkpoint.block_number);
                self.metrics.checkpoint_lag.set(lag as f64);
            }
+
+            // Record revert collection metrics
+            self.metrics.state_reverts_collected.record(revert_counts.total_state_reverts as f64);
+            self.metrics.trie_reverts_collected.record(revert_counts.total_trie_reverts as f64);
+
+            if blocks_count > 0 {
+                let state_per_block =
+                    revert_counts.total_state_reverts as f64 / blocks_count as f64;
+                let trie_per_block = revert_counts.total_trie_reverts as f64 / blocks_count as f64;
+                self.metrics.state_reverts_per_block.record(state_per_block);
+                self.metrics.trie_reverts_per_block.record(trie_per_block);
+            }
        }

        Ok(ExecOutput::done(checkpoint))
--- a/dashboard.json
+++ b/dashboard.json
@@ -6503,6 +6503,190 @@
      "title": "MerkleChangeSets - Blocks Per Execution",
      "type": "timeseries"
    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "description": "Total number of state revert items (accounts + storage slots) collected per execution. Shows workload size.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "items"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 186
+      },
+      "id": 403,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "reth_stages_merkle_changesets_state_reverts_collected{$instance_label=\"$instance\",quantile=~\"(0.5|0.9|0.95|1)\"}",
+          "legendFormat": "state {{quantile}}",
+          "refId": "A"
+        }
+      ],
+      "title": "MerkleChangeSets - State Reverts Collected",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "description": "Total number of trie revert items (account nodes + storage nodes) collected per execution. Shows trie workload size.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "items"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 186
+      },
+      "id": 404,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "expr": "reth_stages_merkle_changesets_trie_reverts_collected{$instance_label=\"$instance\",quantile=~\"(0.5|0.9|0.95|1)\"}",
+          "legendFormat": "trie {{quantile}}",
+          "refId": "A"
+        }
+      ],
+      "title": "MerkleChangeSets - Trie Reverts Collected",
+      "type": "timeseries"
+    },
    {
      "datasource": {
        "type": "prometheus",