ci: reth-bench (#22134)

2026-02-19 03:04:27 -05:00 · 2026-02-17 16:47:47 +00:00
parent a9a6044bc5
commit 719bbc2543
5 changed files with 1176 additions and 2 deletions
--- a/.github/scripts/bench-reth-build.sh
+++ b/.github/scripts/bench-reth-build.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+#
+# Builds (or fetches from cache) reth binaries for benchmarking.
+#
+# Usage: bench-reth-build.sh <main|branch> <commit> [branch-sha]
+#
+#   main   — build/fetch the baseline binary at <commit> (merge-base)
+#   branch — build/fetch the candidate binary + reth-bench at <commit>
+#            optional branch-sha is the PR head commit for cache key
+#
+# Outputs:
+#   main:   target/profiling-baseline/reth
+#   branch: target/profiling/reth, reth-bench installed to cargo bin
+#
+# Required: mc (MinIO client) configured at /home/ubuntu/.mc
+set -euo pipefail
+
+MC="mc --config-dir /home/ubuntu/.mc"
+MODE="$1"
+COMMIT="$2"
+
+case "$MODE" in
+  main)
+    BUCKET="minio/reth-binaries/${COMMIT}"
+    mkdir -p target/profiling-baseline
+
+    if $MC stat "${BUCKET}/reth" &>/dev/null; then
+      echo "Cache hit for main (${COMMIT}), downloading binary..."
+      $MC cp "${BUCKET}/reth" target/profiling-baseline/reth
+      chmod +x target/profiling-baseline/reth
+    else
+      echo "Cache miss for main (${COMMIT}), building from source..."
+      CURRENT_REF=$(git rev-parse HEAD)
+      git checkout "${COMMIT}"
+      cargo build --profile profiling --bin reth
+      cp target/profiling/reth target/profiling-baseline/reth
+      $MC cp target/profiling-baseline/reth "${BUCKET}/reth"
+      git checkout "${CURRENT_REF}"
+    fi
+    ;;
+
+  branch)
+    BRANCH_SHA="${3:-$COMMIT}"
+    BUCKET="minio/reth-binaries/${BRANCH_SHA}"
+
+    if $MC stat "${BUCKET}/reth" &>/dev/null && $MC stat "${BUCKET}/reth-bench" &>/dev/null; then
+      echo "Cache hit for ${BRANCH_SHA}, downloading binaries..."
+      mkdir -p target/profiling
+      $MC cp "${BUCKET}/reth" target/profiling/reth
+      $MC cp "${BUCKET}/reth-bench" /home/ubuntu/.cargo/bin/reth-bench
+      chmod +x target/profiling/reth /home/ubuntu/.cargo/bin/reth-bench
+    else
+      echo "Cache miss for ${BRANCH_SHA}, building from source..."
+      rustup show active-toolchain || rustup default stable
+      make profiling
+      make install-reth-bench
+      $MC cp target/profiling/reth "${BUCKET}/reth"
+      $MC cp "$(which reth-bench)" "${BUCKET}/reth-bench"
+    fi
+    ;;
+
+  *)
+    echo "Usage: $0 <main|branch> <commit> [branch-sha]"
+    exit 1
+    ;;
+esac
--- a/.github/scripts/bench-reth-charts.py
+++ b/.github/scripts/bench-reth-charts.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+"""Generate benchmark charts from reth-bench CSV output.
+
+Usage:
+    bench-engine-charts.py <combined_csv> --output-dir <dir> [--baseline <baseline_csv>]
+
+Generates three PNG charts:
+  1. newPayload latency + Ggas/s per block (+ latency diff when baseline present)
+  2. Wait breakdown (persistence, execution cache, sparse trie) per block
+  3. Scatter plot of gas used vs latency
+
+When --baseline is provided, charts overlay both datasets for comparison.
+"""
+
+import argparse
+import csv
+import sys
+from pathlib import Path
+
+import numpy as np
+
+try:
+    import matplotlib
+
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+except ImportError:
+    print("matplotlib is required: pip install matplotlib", file=sys.stderr)
+    sys.exit(1)
+
+GIGAGAS = 1_000_000_000
+
+
+def parse_combined_csv(path: str) -> list[dict]:
+    rows = []
+    with open(path) as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            rows.append(
+                {
+                    "block_number": int(row["block_number"]),
+                    "gas_used": int(row["gas_used"]),
+                    "new_payload_latency_us": int(row["new_payload_latency"]),
+                    "persistence_wait_us": int(row["persistence_wait"])
+                    if row.get("persistence_wait")
+                    else None,
+                    "execution_cache_wait_us": int(row.get("execution_cache_wait", 0)),
+                    "sparse_trie_wait_us": int(row.get("sparse_trie_wait", 0)),
+                }
+            )
+    return rows
+
+
+def plot_latency_and_throughput(
+    feature: list[dict], baseline: list[dict] | None, out: Path,
+    baseline_name: str = "main", branch_name: str = "branch",
+):
+    num_plots = 3 if baseline else 2
+    fig, axes = plt.subplots(num_plots, 1, figsize=(12, 4 * num_plots), sharex=True)
+    ax1, ax2 = axes[0], axes[1]
+
+    feat_x = [r["block_number"] for r in feature]
+    feat_lat = [r["new_payload_latency_us"] / 1_000 for r in feature]
+    feat_ggas = []
+    for r in feature:
+        lat_s = r["new_payload_latency_us"] / 1_000_000
+        feat_ggas.append(r["gas_used"] / lat_s / GIGAGAS if lat_s > 0 else 0)
+
+    if baseline:
+        base_x = [r["block_number"] for r in baseline]
+        base_lat = [r["new_payload_latency_us"] / 1_000 for r in baseline]
+        base_ggas = []
+        for r in baseline:
+            lat_s = r["new_payload_latency_us"] / 1_000_000
+            base_ggas.append(r["gas_used"] / lat_s / GIGAGAS if lat_s > 0 else 0)
+        ax1.plot(base_x, base_lat, linewidth=0.8, label=baseline_name, alpha=0.7)
+        ax2.plot(base_x, base_ggas, linewidth=0.8, label=baseline_name, alpha=0.7)
+
+    ax1.plot(feat_x, feat_lat, linewidth=0.8, label=branch_name)
+    ax1.set_ylabel("Latency (ms)")
+    ax1.set_title("newPayload Latency per Block")
+    ax1.grid(True, alpha=0.3)
+    if baseline:
+        ax1.legend()
+
+    ax2.plot(feat_x, feat_ggas, linewidth=0.8, label=branch_name)
+    ax2.set_ylabel("Ggas/s")
+    ax2.set_title("Execution Throughput per Block")
+    ax2.grid(True, alpha=0.3)
+    if baseline:
+        ax2.legend()
+
+    if baseline:
+        ax3 = axes[2]
+        base_by_block = {r["block_number"]: r["new_payload_latency_us"] for r in baseline}
+        blocks, diffs = [], []
+        for r in feature:
+            bn = r["block_number"]
+            if bn in base_by_block and base_by_block[bn] > 0:
+                pct = (r["new_payload_latency_us"] - base_by_block[bn]) / base_by_block[bn] * 100
+                blocks.append(bn)
+                diffs.append(pct)
+        if blocks:
+            colors = ["green" if d <= 0 else "red" for d in diffs]
+            ax3.bar(blocks, diffs, width=1.0, color=colors, alpha=0.7, edgecolor="none")
+            ax3.axhline(0, color="black", linewidth=0.5)
+        ax3.set_ylabel("Δ Latency (%)")
+        ax3.set_title("Per-Block newPayload Latency Change (branch vs main)")
+        ax3.grid(True, alpha=0.3, axis="y")
+
+    axes[-1].set_xlabel("Block Number")
+    fig.tight_layout()
+    fig.savefig(out, dpi=150)
+    plt.close(fig)
+
+
+def plot_wait_breakdown(
+    feature: list[dict], baseline: list[dict] | None, out: Path,
+    baseline_name: str = "main", branch_name: str = "branch",
+):
+    series = [
+        ("Persistence Wait", "persistence_wait_us"),
+        ("State Cache Wait", "execution_cache_wait_us"),
+        ("Trie Cache Wait", "sparse_trie_wait_us"),
+    ]
+
+    fig, axes = plt.subplots(len(series), 1, figsize=(12, 3 * len(series)), sharex=True)
+    for ax, (label, key) in zip(axes, series):
+        if baseline:
+            bx = [r["block_number"] for r in baseline if r[key] is not None]
+            by = [r[key] / 1_000 for r in baseline if r[key] is not None]
+            if bx:
+                ax.plot(bx, by, linewidth=0.8, label=baseline_name, alpha=0.7)
+
+        fx = [r["block_number"] for r in feature if r[key] is not None]
+        fy = [r[key] / 1_000 for r in feature if r[key] is not None]
+        if fx:
+            ax.plot(fx, fy, linewidth=0.8, label=branch_name)
+
+        ax.set_ylabel("ms")
+        ax.set_title(label)
+        ax.grid(True, alpha=0.3)
+        if baseline:
+            ax.legend()
+
+    axes[-1].set_xlabel("Block Number")
+    fig.suptitle("Wait Time Breakdown per Block", fontsize=14, y=1.01)
+    fig.tight_layout()
+    fig.savefig(out, dpi=150, bbox_inches="tight")
+    plt.close(fig)
+
+
+def _add_regression(ax, x, y, color, label):
+    """Add a linear regression line to the axes."""
+    if len(x) < 2:
+        return
+    xa, ya = np.array(x), np.array(y)
+    m, b = np.polyfit(xa, ya, 1)
+    x_range = np.linspace(xa.min(), xa.max(), 100)
+    ax.plot(x_range, m * x_range + b, color=color, linewidth=1.5, alpha=0.8,
+            label=label)
+
+
+def plot_gas_vs_latency(
+    feature: list[dict], baseline: list[dict] | None, out: Path,
+    baseline_name: str = "main", branch_name: str = "branch",
+):
+    fig, ax = plt.subplots(figsize=(8, 6))
+
+    if baseline:
+        bgas = [r["gas_used"] / 1_000_000 for r in baseline]
+        blat = [r["new_payload_latency_us"] / 1_000 for r in baseline]
+        ax.scatter(bgas, blat, s=8, alpha=0.5)
+        _add_regression(ax, bgas, blat, "tab:blue", baseline_name)
+
+    fgas = [r["gas_used"] / 1_000_000 for r in feature]
+    flat = [r["new_payload_latency_us"] / 1_000 for r in feature]
+    ax.scatter(fgas, flat, s=8, alpha=0.6)
+    _add_regression(ax, fgas, flat, "tab:orange", branch_name)
+
+    ax.set_xlabel("Gas Used (Mgas)")
+    ax.set_ylabel("newPayload Latency (ms)")
+    ax.set_title("Gas Used vs Latency")
+    ax.grid(True, alpha=0.3)
+    ax.legend()
+    fig.tight_layout()
+    fig.savefig(out, dpi=150)
+    plt.close(fig)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate benchmark charts")
+    parser.add_argument("combined_csv", help="Path to combined_latency.csv (feature)")
+    parser.add_argument(
+        "--output-dir", required=True, help="Output directory for PNG charts"
+    )
+    parser.add_argument(
+        "--baseline", help="Path to baseline (main) combined_latency.csv"
+    )
+    parser.add_argument("--baseline-name", default="main", help="Label for baseline")
+    parser.add_argument("--branch-name", default="branch", help="Label for branch")
+    args = parser.parse_args()
+
+    feature = parse_combined_csv(args.combined_csv)
+    if not feature:
+        print("No results found in combined CSV", file=sys.stderr)
+        sys.exit(1)
+
+    baseline = None
+    if args.baseline:
+        baseline = parse_combined_csv(args.baseline)
+        if not baseline:
+            print(
+                "Warning: no results in baseline CSV, skipping comparison",
+                file=sys.stderr,
+            )
+            baseline = None
+
+    out_dir = Path(args.output_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    bname = args.baseline_name
+    fname = args.branch_name
+    plot_latency_and_throughput(feature, baseline, out_dir / "latency_throughput.png", bname, fname)
+    plot_wait_breakdown(feature, baseline, out_dir / "wait_breakdown.png", bname, fname)
+    plot_gas_vs_latency(feature, baseline, out_dir / "gas_vs_latency.png", bname, fname)
+
+    print(f"Charts written to {out_dir}")
+
+
+if __name__ == "__main__":
+    main()
--- a/.github/scripts/bench-reth-run.sh
+++ b/.github/scripts/bench-reth-run.sh
@@ -0,0 +1,94 @@
+#!/usr/bin/env bash
+#
+# Runs a single reth-bench cycle: mount snapshot → start node → warmup →
+# benchmark → stop node → recover snapshot.
+#
+# Usage: bench-reth-run.sh <label> <binary> <output-dir>
+#
+# Required env: SCHELK_MOUNT, BENCH_RPC_URL, BENCH_BLOCKS, BENCH_WARMUP_BLOCKS
+set -euo pipefail
+
+LABEL="$1"
+BINARY="$2"
+OUTPUT_DIR="$3"
+DATADIR="$SCHELK_MOUNT/datadir"
+LOG="/tmp/reth-bench-node-${LABEL}.log"
+
+cleanup() {
+  kill "$TAIL_PID" 2>/dev/null || true
+  if [ -n "${RETH_PID:-}" ] && sudo kill -0 "$RETH_PID" 2>/dev/null; then
+    sudo kill "$RETH_PID"
+    for i in $(seq 1 30); do
+      sudo kill -0 "$RETH_PID" 2>/dev/null || break
+      sleep 1
+    done
+    sudo kill -9 "$RETH_PID" 2>/dev/null || true
+  fi
+  mountpoint -q "$SCHELK_MOUNT" && sudo schelk recover -y || true
+}
+TAIL_PID=
+trap cleanup EXIT
+
+# Mount
+sudo schelk mount -y
+sync
+sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'
+echo "=== Cache state after drop ==="
+free -h
+grep Cached /proc/meminfo
+
+# Start reth
+# CPU layout: core 0 = OS/IRQs/reth-bench/aux, cores 1+ = reth node
+RETH_BENCH="$(which reth-bench)"
+ONLINE=$(nproc --all)
+RETH_CPUS="1-$(( ONLINE - 1 ))"
+sudo taskset -c "$RETH_CPUS" nice -n -20 "$BINARY" node \
+  --datadir "$DATADIR" \
+  --engine.accept-execution-requests-hash \
+  --http \
+  --http.port 8545 \
+  --ws \
+  --ws.api all \
+  --authrpc.port 8551 \
+  --disable-discovery \
+  --no-persist-peers \
+  > "$LOG" 2>&1 &
+
+RETH_PID=$!
+stdbuf -oL tail -f "$LOG" | sed -u "s/^/[reth] /" &
+TAIL_PID=$!
+
+for i in $(seq 1 60); do
+  if curl -sf http://127.0.0.1:8545 -X POST \
+    -H 'Content-Type: application/json' \
+    -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' \
+    > /dev/null 2>&1; then
+    echo "reth (${LABEL}) is ready after ${i}s"
+    break
+  fi
+  if [ "$i" -eq 60 ]; then
+    echo "::error::reth (${LABEL}) failed to start within 60s"
+    cat "$LOG"
+    exit 1
+  fi
+  sleep 1
+done
+
+# Warmup
+sudo nice -n -20 "$RETH_BENCH" new-payload-fcu \
+  --rpc-url "$BENCH_RPC_URL" \
+  --engine-rpc-url http://127.0.0.1:8551 \
+  --jwt-secret "$DATADIR/jwt.hex" \
+  --advance "${BENCH_WARMUP_BLOCKS:-50}" \
+  --reth-new-payload 2>&1 | sed -u "s/^/[bench] /"
+
+# Benchmark
+sudo nice -n -20 "$RETH_BENCH" new-payload-fcu \
+  --rpc-url "$BENCH_RPC_URL" \
+  --engine-rpc-url http://127.0.0.1:8551 \
+  --jwt-secret "$DATADIR/jwt.hex" \
+  --advance "$BENCH_BLOCKS" \
+  --reth-new-payload \
+  --output "$OUTPUT_DIR" 2>&1 | sed -u "s/^/[bench] /"
+
+# cleanup runs via trap
--- a/.github/scripts/bench-reth-summary.py
+++ b/.github/scripts/bench-reth-summary.py
@@ -0,0 +1,415 @@
+#!/usr/bin/env python3
+"""Parse reth-bench CSV output and generate a summary JSON + markdown comparison.
+
+Usage:
+    bench-reth-summary.py <combined_csv> <gas_csv> \
+        --output-summary <summary.json> \
+        --output-markdown <comment.md> \
+        --baseline-csv <baseline_combined.csv> \
+        [--repo <owner/repo>] \
+        [--baseline-ref <sha>] \
+        [--branch-name <name>] \
+        [--branch-sha <sha>]
+
+Generates a paired statistical comparison between baseline (main) and branch.
+Matches blocks by number and computes per-block diffs to cancel out gas
+variance. Fails if baseline or branch CSV is missing or empty.
+"""
+
+import argparse
+import csv
+import json
+import math
+import random
+import sys
+
+GIGAGAS = 1_000_000_000
+T_CRITICAL = 1.96  # two-tailed 95% confidence
+BOOTSTRAP_ITERATIONS = 10_000
+
+
+def parse_combined_csv(path: str) -> list[dict]:
+    """Parse combined_latency.csv into a list of per-block dicts."""
+    rows = []
+    with open(path) as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            rows.append(
+                {
+                    "block_number": int(row["block_number"]),
+                    "gas_used": int(row["gas_used"]),
+                    "gas_limit": int(row["gas_limit"]),
+                    "transaction_count": int(row["transaction_count"]),
+                    "new_payload_latency_us": int(row["new_payload_latency"]),
+                    "fcu_latency_us": int(row["fcu_latency"]),
+                    "total_latency_us": int(row["total_latency"]),
+                    "persistence_wait_us": int(row["persistence_wait"])
+                    if row.get("persistence_wait")
+                    else None,
+                    "execution_cache_wait_us": int(row.get("execution_cache_wait", 0)),
+                    "sparse_trie_wait_us": int(row.get("sparse_trie_wait", 0)),
+                }
+            )
+    return rows
+
+
+def parse_gas_csv(path: str) -> list[dict]:
+    """Parse total_gas.csv into a list of per-block dicts."""
+    rows = []
+    with open(path) as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            rows.append(
+                {
+                    "block_number": int(row["block_number"]),
+                    "gas_used": int(row["gas_used"]),
+                    "time_us": int(row["time"]),
+                }
+            )
+    return rows
+
+
+def stddev(values: list[float], mean: float) -> float:
+    if len(values) < 2:
+        return 0.0
+    return math.sqrt(sum((v - mean) ** 2 for v in values) / (len(values) - 1))
+
+
+def percentile(sorted_vals: list[float], pct: int) -> float:
+    if not sorted_vals:
+        return 0.0
+    idx = int(len(sorted_vals) * pct / 100)
+    idx = min(idx, len(sorted_vals) - 1)
+    return sorted_vals[idx]
+
+
+def compute_stats(combined: list[dict]) -> dict:
+    """Compute per-run statistics from parsed CSV data."""
+    n = len(combined)
+    if n == 0:
+        return {}
+
+    latencies_ms = [r["new_payload_latency_us"] / 1_000 for r in combined]
+    sorted_lat = sorted(latencies_ms)
+    mean_lat = sum(latencies_ms) / n
+    std_lat = stddev(latencies_ms, mean_lat)
+
+    mgas_s_values = []
+    for r in combined:
+        lat_s = r["new_payload_latency_us"] / 1_000_000
+        if lat_s > 0:
+            mgas_s_values.append(r["gas_used"] / lat_s / 1_000_000)
+    mean_mgas_s = sum(mgas_s_values) / len(mgas_s_values) if mgas_s_values else 0
+
+    return {
+        "n": n,
+        "mean_ms": mean_lat,
+        "stddev_ms": std_lat,
+        "p50_ms": percentile(sorted_lat, 50),
+        "p90_ms": percentile(sorted_lat, 90),
+        "p99_ms": percentile(sorted_lat, 99),
+        "mean_mgas_s": mean_mgas_s,
+    }
+
+
+def _paired_data(
+    baseline: list[dict], branch: list[dict]
+) -> tuple[list[tuple[float, float]], list[float], list[float]]:
+    """Match blocks and return paired latencies and per-block diffs.
+
+    Returns:
+        pairs: list of (baseline_ms, branch_ms) tuples
+        lat_diffs_ms: list of branch − baseline latency diffs in ms
+        mgas_diffs: list of branch − baseline Mgas/s diffs
+    """
+    baseline_by_block = {r["block_number"]: r for r in baseline}
+    branch_by_block = {r["block_number"]: r for r in branch}
+    common_blocks = sorted(set(baseline_by_block) & set(branch_by_block))
+
+    pairs = []
+    lat_diffs_ms = []
+    mgas_diffs = []
+    for bn in common_blocks:
+        b = baseline_by_block[bn]
+        f = branch_by_block[bn]
+        b_ms = b["new_payload_latency_us"] / 1_000
+        f_ms = f["new_payload_latency_us"] / 1_000
+        pairs.append((b_ms, f_ms))
+        lat_diffs_ms.append(f_ms - b_ms)
+        b_lat_s = b["new_payload_latency_us"] / 1_000_000
+        f_lat_s = f["new_payload_latency_us"] / 1_000_000
+        if b_lat_s > 0 and f_lat_s > 0:
+            mgas_diffs.append(
+                f["gas_used"] / f_lat_s / 1_000_000
+                - b["gas_used"] / b_lat_s / 1_000_000
+            )
+    return pairs, lat_diffs_ms, mgas_diffs
+
+
+def compute_paired_stats(
+    baseline_runs: list[list[dict]],
+    branch_runs: list[list[dict]],
+) -> dict:
+    """Compute paired statistics between baseline and branch runs.
+
+    Each pair (baseline_runs[i], branch_runs[i]) produces per-block diffs.
+    All diffs are pooled for the final CI.
+    """
+    all_pairs = []
+    all_lat_diffs = []
+    all_mgas_diffs = []
+    for baseline, branch in zip(baseline_runs, branch_runs):
+        pairs, lat_diffs, mgas_diffs = _paired_data(baseline, branch)
+        all_pairs.extend(pairs)
+        all_lat_diffs.extend(lat_diffs)
+        all_mgas_diffs.extend(mgas_diffs)
+
+    if not all_lat_diffs:
+        return {}
+
+    n = len(all_lat_diffs)
+    mean_diff = sum(all_lat_diffs) / n
+    std_diff = stddev(all_lat_diffs, mean_diff)
+    se = std_diff / math.sqrt(n) if n > 0 else 0.0
+    ci = T_CRITICAL * se
+
+    # Bootstrap CI on difference-of-percentiles (resample paired blocks)
+    base_lats = sorted([p[0] for p in all_pairs])
+    branch_lats = sorted([p[1] for p in all_pairs])
+    p50_diff = percentile(branch_lats, 50) - percentile(base_lats, 50)
+    p90_diff = percentile(branch_lats, 90) - percentile(base_lats, 90)
+    p99_diff = percentile(branch_lats, 99) - percentile(base_lats, 99)
+
+    rng = random.Random(42)
+    p50_boot, p90_boot, p99_boot = [], [], []
+    for _ in range(BOOTSTRAP_ITERATIONS):
+        sample = rng.choices(all_pairs, k=n)
+        b_sorted = sorted(p[0] for p in sample)
+        f_sorted = sorted(p[1] for p in sample)
+        p50_boot.append(percentile(f_sorted, 50) - percentile(b_sorted, 50))
+        p90_boot.append(percentile(f_sorted, 90) - percentile(b_sorted, 90))
+        p99_boot.append(percentile(f_sorted, 99) - percentile(b_sorted, 99))
+    p50_boot.sort()
+    p90_boot.sort()
+    p99_boot.sort()
+    lo = int(BOOTSTRAP_ITERATIONS * 0.025)
+    hi = int(BOOTSTRAP_ITERATIONS * 0.975)
+
+    mean_mgas_diff = sum(all_mgas_diffs) / len(all_mgas_diffs) if all_mgas_diffs else 0.0
+    std_mgas_diff = stddev(all_mgas_diffs, mean_mgas_diff) if len(all_mgas_diffs) > 1 else 0.0
+    mgas_se = std_mgas_diff / math.sqrt(len(all_mgas_diffs)) if all_mgas_diffs else 0.0
+    mgas_ci = T_CRITICAL * mgas_se
+
+    return {
+        "n": n,
+        "mean_diff_ms": mean_diff,
+        "ci_ms": ci,
+        "p50_diff_ms": p50_diff,
+        "p50_ci_ms": (p50_boot[hi] - p50_boot[lo]) / 2,
+        "p90_diff_ms": p90_diff,
+        "p90_ci_ms": (p90_boot[hi] - p90_boot[lo]) / 2,
+        "p99_diff_ms": p99_diff,
+        "p99_ci_ms": (p99_boot[hi] - p99_boot[lo]) / 2,
+        "mean_mgas_diff": mean_mgas_diff,
+        "mgas_ci": mgas_ci,
+    }
+
+
+def compute_summary(combined: list[dict], gas: list[dict]) -> dict:
+    """Compute aggregate metrics from parsed CSV data."""
+    blocks = len(combined)
+    return {
+        "blocks": blocks,
+    }
+
+
+def format_duration(seconds: float) -> str:
+    if seconds >= 60:
+        return f"{seconds / 60:.1f}min"
+    return f"{seconds}s"
+
+
+def format_gas(gas: int) -> str:
+    if gas >= GIGAGAS:
+        return f"{gas / GIGAGAS:.1f}G"
+    if gas >= 1_000_000:
+        return f"{gas / 1_000_000:.1f}M"
+    return f"{gas:,}"
+
+
+
+def fmt_ms(v: float) -> str:
+    return f"{v:.2f}ms"
+
+
+def fmt_mgas(v: float) -> str:
+    return f"{v:.2f}"
+
+
+def change_str(pct: float, ci_pct: float, lower_is_better: bool) -> str:
+    """Format change% with paired CI significance.
+
+    Significant if the CI doesn't cross zero (i.e. |pct| > ci_pct).
+    """
+    significant = abs(pct) > ci_pct
+    if not significant:
+        emoji = "⚪"
+    elif (pct < 0) == lower_is_better:
+        emoji = "✅"
+    else:
+        emoji = "❌"
+
+    return f"{pct:+.2f}% {emoji} (±{ci_pct:.2f}%)"
+
+
+def generate_comparison_table(
+    run1: dict,
+    run2: dict,
+    paired: dict,
+    repo: str,
+    baseline_ref: str,
+    branch_name: str,
+    branch_sha: str,
+) -> str:
+    """Generate a markdown comparison table between baseline (main) and branch."""
+    n = paired["n"]
+
+    def pct(base: float, feat: float) -> float:
+        return (feat - base) / base * 100.0 if base > 0 else 0.0
+
+    mean_pct = pct(run1["mean_ms"], run2["mean_ms"])
+    gas_pct = pct(run1["mean_mgas_s"], run2["mean_mgas_s"])
+
+    p50_pct = pct(run1["p50_ms"], run2["p50_ms"])
+    p90_pct = pct(run1["p90_ms"], run2["p90_ms"])
+    p99_pct = pct(run1["p99_ms"], run2["p99_ms"])
+
+    # Bootstrap CIs as % of baseline percentile
+    p50_ci_pct = paired["p50_ci_ms"] / run1["p50_ms"] * 100.0 if run1["p50_ms"] > 0 else 0.0
+    p90_ci_pct = paired["p90_ci_ms"] / run1["p90_ms"] * 100.0 if run1["p90_ms"] > 0 else 0.0
+    p99_ci_pct = paired["p99_ci_ms"] / run1["p99_ms"] * 100.0 if run1["p99_ms"] > 0 else 0.0
+
+    # CI as a percentage of baseline mean
+    lat_ci_pct = paired["ci_ms"] / run1["mean_ms"] * 100.0 if run1["mean_ms"] > 0 else 0.0
+    mgas_ci_pct = paired["mgas_ci"] / run1["mean_mgas_s"] * 100.0 if run1["mean_mgas_s"] > 0 else 0.0
+
+    base_url = f"https://github.com/{repo}/commit"
+    baseline_label = f"[`main`]({base_url}/{baseline_ref})"
+    branch_label = f"[`{branch_name}`]({base_url}/{branch_sha})"
+
+    lines = [
+        f"| Metric | {baseline_label} | {branch_label} | Change |",
+        "|--------|------|--------|--------|",
+        f"| Mean | {fmt_ms(run1['mean_ms'])} | {fmt_ms(run2['mean_ms'])} | {change_str(mean_pct, lat_ci_pct, lower_is_better=True)} |",
+        f"| StdDev | {fmt_ms(run1['stddev_ms'])} | {fmt_ms(run2['stddev_ms'])} | |",
+        f"| P50 | {fmt_ms(run1['p50_ms'])} | {fmt_ms(run2['p50_ms'])} | {change_str(p50_pct, p50_ci_pct, lower_is_better=True)} |",
+        f"| P90 | {fmt_ms(run1['p90_ms'])} | {fmt_ms(run2['p90_ms'])} | {change_str(p90_pct, p90_ci_pct, lower_is_better=True)} |",
+        f"| P99 | {fmt_ms(run1['p99_ms'])} | {fmt_ms(run2['p99_ms'])} | {change_str(p99_pct, p99_ci_pct, lower_is_better=True)} |",
+        f"| Mgas/s | {fmt_mgas(run1['mean_mgas_s'])} | {fmt_mgas(run2['mean_mgas_s'])} | {change_str(gas_pct, mgas_ci_pct, lower_is_better=False)} |",
+        "",
+        f"*{n} blocks*",
+    ]
+    return "\n".join(lines)
+
+
+def generate_markdown(
+    summary: dict, comparison_table: str,
+    behind_main: int = 0, repo: str = "", baseline_ref: str = "",
+) -> str:
+    """Generate a markdown comment body."""
+    lines = ["## Benchmark Results", "", comparison_table]
+    if behind_main > 0:
+        s = "s" if behind_main > 1 else ""
+        diff_link = f"https://github.com/{repo}/compare/{baseline_ref[:12]}...main"
+        lines.append("")
+        lines.append(f"> ⚠️ Branch is [**{behind_main} commit{s} behind `main`**]({diff_link}). Consider rebasing for accurate results.")
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Parse reth-bench ABBA results")
+    parser.add_argument(
+        "--baseline-csv", nargs="+", required=True,
+        help="Baseline combined_latency.csv files (A1, A2)",
+    )
+    parser.add_argument(
+        "--branch-csv", nargs="+", required=True,
+        help="Branch combined_latency.csv files (B1, B2)",
+    )
+    parser.add_argument("--gas-csv", required=True, help="Path to total_gas.csv")
+    parser.add_argument(
+        "--output-summary", required=True, help="Output JSON summary path"
+    )
+    parser.add_argument("--output-markdown", required=True, help="Output markdown path")
+    parser.add_argument(
+        "--repo", default="paradigmxyz/reth", help="GitHub repo (owner/name)"
+    )
+    parser.add_argument("--baseline-ref", default=None, help="Baseline commit SHA")
+    parser.add_argument("--branch-name", default=None, help="Branch name")
+    parser.add_argument("--branch-sha", default=None, help="Branch commit SHA")
+    parser.add_argument("--behind-main", type=int, default=0, help="Commits behind main")
+    args = parser.parse_args()
+
+    if len(args.baseline_csv) != len(args.branch_csv):
+        print("Must provide equal number of baseline and branch CSVs", file=sys.stderr)
+        sys.exit(1)
+
+    baseline_runs = []
+    branch_runs = []
+    for path in args.baseline_csv:
+        data = parse_combined_csv(path)
+        if not data:
+            print(f"No results in {path}", file=sys.stderr)
+            sys.exit(1)
+        baseline_runs.append(data)
+    for path in args.branch_csv:
+        data = parse_combined_csv(path)
+        if not data:
+            print(f"No results in {path}", file=sys.stderr)
+            sys.exit(1)
+        branch_runs.append(data)
+
+    gas = parse_gas_csv(args.gas_csv)
+
+    all_baseline = [r for run in baseline_runs for r in run]
+    all_branch = [r for run in branch_runs for r in run]
+
+    summary = compute_summary(all_branch, gas)
+    with open(args.output_summary, "w") as f:
+        json.dump(summary, f, indent=2)
+    print(f"Summary written to {args.output_summary}")
+
+    baseline_stats = compute_stats(all_baseline)
+    branch_stats = compute_stats(all_branch)
+    paired_stats = compute_paired_stats(baseline_runs, branch_runs)
+
+    if not paired_stats:
+        print("No common blocks between baseline and branch runs", file=sys.stderr)
+        sys.exit(1)
+
+    comparison_table = generate_comparison_table(
+        baseline_stats,
+        branch_stats,
+        paired_stats,
+        repo=args.repo,
+        baseline_ref=args.baseline_ref or "main",
+        branch_name=args.branch_name or "branch",
+        branch_sha=args.branch_sha or "unknown",
+    )
+    print(f"Generated comparison ({paired_stats['n']} paired blocks, "
+          f"mean diff {paired_stats['mean_diff_ms']:+.3f}ms ± {paired_stats['ci_ms']:.3f}ms)")
+
+    markdown = generate_markdown(
+        summary, comparison_table,
+        behind_main=args.behind_main,
+        repo=args.repo,
+        baseline_ref=args.baseline_ref or "",
+    )
+
+    with open(args.output_markdown, "w") as f:
+        f.write(markdown)
+    print(f"Markdown written to {args.output_markdown}")
+
+
+if __name__ == "__main__":
+    main()
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -1,11 +1,25 @@
 # Runs benchmarks.
+#
+# The reth-bench job replays real blocks via the Engine API against a reth node
+# backed by a local snapshot managed with schelk.
+#
+# It runs the main (baseline) binary and the branch (candidate) binary on the
+# same block range (snapshot recovered between runs) to compare performance.

 on:
-  pull_request:
  # TODO: Disabled temporarily for https://github.com/CodSpeedHQ/runner/issues/55
  # merge_group:
  push:
    branches: [main]
+  issue_comment:
+    types: [created, edited]
+  workflow_dispatch:
+    inputs:
+      blocks:
+        description: "Number of blocks to benchmark"
+        required: false
+        default: "50"
+        type: string

 env:
  CARGO_TERM_COLOR: always
@@ -14,9 +28,19 @@ env:
  RUSTC_WRAPPER: "sccache"

 name: bench
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: bench-${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 jobs:
  codspeed:
-    runs-on: ${{ github.repository == 'paradigmxyz/reth' && 'depot-ubuntu-latest' || 'ubuntu-latest' }}
+    if: github.event_name != 'issue_comment'
+    runs-on: depot-ubuntu-latest
    strategy:
      matrix:
        partition: [1, 2]
@@ -31,6 +55,7 @@ jobs:
      - uses: actions/checkout@v6
        with:
          submodules: true
+          ref: ${{ github.event_name == 'issue_comment' && format('refs/pull/{0}/merge', github.event.issue.number) || '' }}
      - uses: rui314/setup-mold@v1
      - uses: dtolnay/rust-toolchain@stable
      - uses: mozilla-actions/sccache-action@v0.0.9
@@ -49,3 +74,345 @@ jobs:
          run: cargo codspeed run ${{ matrix.crates }}
          mode: instrumentation
          token: ${{ secrets.CODSPEED_TOKEN }}
+
+  reth-bench:
+    if: github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, 'derek bench')
+    name: reth-bench
+    runs-on: [self-hosted, Linux, X64]
+    timeout-minutes: 120
+    env:
+      BENCH_RPC_URL: https://ethereum.reth.rs/rpc
+      SCHELK_MOUNT: /reth-bench
+    steps:
+      - name: Check org membership
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const user = context.payload.comment.user.login;
+            try {
+              const { status } = await github.rest.orgs.checkMembershipForUser({
+                org: 'paradigmxyz',
+                username: user,
+              });
+              if (status !== 204 && status !== 302) {
+                core.setFailed(`@${user} is not a member of paradigmxyz`);
+              }
+            } catch (e) {
+              core.setFailed(`@${user} is not a member of paradigmxyz`);
+            }
+
+      - name: Parse arguments
+        id: args
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const body = context.payload.comment.body.trim();
+            const known = new Set(['blocks', 'warmup']);
+            const defaults = { blocks: '500', warmup: '100' };
+            const unknown = [];
+            const invalid = [];
+            const args = body.replace(/^derek bench\s*/, '');
+            for (const part of args.split(/\s+/).filter(Boolean)) {
+              const eq = part.indexOf('=');
+              if (eq === -1) {
+                unknown.push(part);
+                continue;
+              }
+              const key = part.slice(0, eq);
+              const value = part.slice(eq + 1);
+              if (!known.has(key)) {
+                unknown.push(key);
+              } else if (!/^\d+$/.test(value)) {
+                invalid.push(`\`${key}=${value}\` (must be a positive integer)`);
+              } else {
+                defaults[key] = value;
+              }
+            }
+            const errors = [];
+            if (unknown.length) errors.push(`Unknown argument(s): \`${unknown.join('`, `')}\``);
+            if (invalid.length) errors.push(`Invalid value(s): ${invalid.join(', ')}`);
+            if (errors.length) {
+              const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`derek bench [blocks=N] [warmup=N]\``;
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: msg,
+              });
+              core.setFailed(msg);
+              return;
+            }
+            core.setOutput('blocks', defaults.blocks);
+            core.setOutput('warmup', defaults.warmup);
+            core.exportVariable('BENCH_BLOCKS', defaults.blocks);
+            core.exportVariable('BENCH_WARMUP_BLOCKS', defaults.warmup);
+
+      - name: Acknowledge request
+        id: ack
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: context.payload.comment.id,
+              content: 'eyes',
+            });
+
+            const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            const blocks = '${{ steps.args.outputs.blocks }}';
+            const warmup = '${{ steps.args.outputs.warmup }}';
+            const { data: comment } = await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Building binaries...\n\n**Config:** ${blocks} blocks, ${warmup} warmup blocks`,
+            });
+            core.setOutput('comment-id', comment.id);
+      - uses: actions/checkout@v6
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ format('refs/pull/{0}/merge', github.event.issue.number) }}
+
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: mozilla-actions/sccache-action@v0.0.9
+        continue-on-error: true
+
+      # Verify all required tools are available
+      - name: Check dependencies
+        run: |
+          export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
+          echo "$HOME/.local/bin" >> "$GITHUB_PATH"
+          echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
+          missing=()
+          for cmd in mc schelk cpupower taskset stdbuf python3 curl make uv; do
+            command -v "$cmd" &>/dev/null || missing+=("$cmd")
+          done
+          if [ ${#missing[@]} -gt 0 ]; then
+            echo "::error::Missing required tools: ${missing[*]}"
+            exit 1
+          fi
+          echo "All dependencies found"
+
+      # Build binaries
+      - name: Fetch or build main binaries
+        run: |
+          MERGE_BASE=$(git merge-base HEAD origin/main 2>/dev/null || echo "${{ github.sha }}")
+          .github/scripts/bench-reth-build.sh main "$MERGE_BASE"
+      - name: Fetch or build branch binaries
+        run: |
+          BRANCH_SHA="${{ github.sha }}"
+          .github/scripts/bench-reth-build.sh branch "$BRANCH_SHA"
+
+      # System tuning for reproducible benchmarks
+      - name: System setup
+        run: |
+          sudo cpupower frequency-set -g performance || true
+          # Disable turbo boost (Intel and AMD paths)
+          echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo 2>/dev/null || true
+          echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost 2>/dev/null || true
+          sudo swapoff -a || true
+          echo 0 | sudo tee /proc/sys/kernel/randomize_va_space || true
+          # Disable SMT (hyperthreading)
+          for cpu in /sys/devices/system/cpu/cpu*/topology/thread_siblings_list; do
+            first=$(cut -d, -f1 < "$cpu" | cut -d- -f1)
+            current=$(echo "$cpu" | grep -o 'cpu[0-9]*' | grep -o '[0-9]*')
+            if [ "$current" != "$first" ]; then
+              echo 0 | sudo tee "/sys/devices/system/cpu/cpu${current}/online" || true
+            fi
+          done
+          echo "Online CPUs: $(nproc)"
+          # Disable transparent huge pages (compaction causes latency spikes)
+          for p in /sys/kernel/mm/transparent_hugepage /sys/kernel/mm/transparent_hugepages; do
+            [ -d "$p" ] && echo never | sudo tee "$p/enabled" && echo never | sudo tee "$p/defrag" && break
+          done || true
+          # Prevent deep C-states (avoids wake-up latency jitter)
+          sudo sh -c 'exec 3<>/dev/cpu_dma_latency; echo -ne "\x00\x00\x00\x00" >&3; sleep infinity' &
+          # Move all IRQs to core 0 (housekeeping core)
+          for irq in /proc/irq/*/smp_affinity_list; do
+            echo 0 | sudo tee "$irq" 2>/dev/null || true
+          done
+          # Stop noisy background services
+          sudo systemctl stop irqbalance cron atd unattended-upgrades snapd 2>/dev/null || true
+          # Log environment for reproducibility
+          echo "=== Benchmark environment ==="
+          uname -r
+          lscpu | grep -E 'Model name|CPU\(s\)|MHz|NUMA'
+          cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
+          cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq
+          cat /sys/kernel/mm/transparent_hugepage/enabled 2>/dev/null || cat /sys/kernel/mm/transparent_hugepages/enabled 2>/dev/null || echo "THP: unknown"
+          free -h
+
+      # Clean up any leftover state
+      - name: Pre-flight cleanup
+        run: |
+          pkill -9 reth || true
+          mountpoint -q "$SCHELK_MOUNT" && sudo schelk recover -y || true
+
+      - name: Update status (running benchmarks)
+        if: steps.ack.outputs.comment-id
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            await github.rest.issues.updateComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ steps.ack.outputs.comment-id || 0 }},
+              body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Running benchmarks (2 runs)...`,
+            });
+
+      - name: "Run benchmark: baseline"
+        run: taskset -c 0 .github/scripts/bench-reth-run.sh baseline target/profiling-baseline/reth /tmp/bench-results-baseline
+
+      - name: "Run benchmark: branch"
+        run: taskset -c 0 .github/scripts/bench-reth-run.sh branch target/profiling/reth /tmp/bench-results-branch
+
+      # Results & charts
+      - name: Parse results
+        id: results
+        if: success()
+        env:
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+          BRANCH_SHA: ${{ github.sha }}
+        run: |
+          git fetch origin main --quiet
+          # Use the actual PR head commit, not HEAD (which is the merge commit
+          # refs/pull/N/merge and always has origin/main as a parent).
+          MERGE_BASE=$(git merge-base "${BRANCH_SHA}" origin/main 2>/dev/null || echo "${{ github.sha }}")
+          MAIN_HEAD=$(git rev-parse origin/main 2>/dev/null || echo "")
+          BEHIND_MAIN=0
+          if [ -n "$MAIN_HEAD" ] && [ "$MERGE_BASE" != "$MAIN_HEAD" ]; then
+            BEHIND_MAIN=$(git rev-list --count "${MERGE_BASE}..${MAIN_HEAD}" 2>/dev/null || echo "0")
+          fi
+
+          SUMMARY_ARGS="--output-summary /tmp/bench-summary.json"
+          SUMMARY_ARGS="$SUMMARY_ARGS --output-markdown /tmp/bench-comment.md"
+          SUMMARY_ARGS="$SUMMARY_ARGS --repo ${{ github.repository }}"
+          SUMMARY_ARGS="$SUMMARY_ARGS --baseline-ref ${MERGE_BASE}"
+          SUMMARY_ARGS="$SUMMARY_ARGS --branch-name ${BRANCH_NAME}"
+          SUMMARY_ARGS="$SUMMARY_ARGS --branch-sha ${BRANCH_SHA}"
+          SUMMARY_ARGS="$SUMMARY_ARGS --baseline-csv /tmp/bench-results-baseline/combined_latency.csv"
+          SUMMARY_ARGS="$SUMMARY_ARGS --branch-csv /tmp/bench-results-branch/combined_latency.csv"
+          SUMMARY_ARGS="$SUMMARY_ARGS --gas-csv /tmp/bench-results-branch/total_gas.csv"
+          if [ "$BEHIND_MAIN" -gt 0 ]; then
+            SUMMARY_ARGS="$SUMMARY_ARGS --behind-main $BEHIND_MAIN"
+          fi
+          # shellcheck disable=SC2086
+          python3 .github/scripts/bench-reth-summary.py $SUMMARY_ARGS
+
+      - name: Generate charts
+        if: success()
+        env:
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+        run: |
+          CHART_ARGS="/tmp/bench-results-branch/combined_latency.csv --output-dir /tmp/bench-charts"
+          CHART_ARGS="$CHART_ARGS --baseline /tmp/bench-results-baseline/combined_latency.csv"
+          CHART_ARGS="$CHART_ARGS --branch-name ${BRANCH_NAME}"
+          # shellcheck disable=SC2086
+          uv run --with matplotlib python3 .github/scripts/bench-reth-charts.py $CHART_ARGS
+
+      - name: Upload results
+        if: success()
+        uses: actions/upload-artifact@v4
+        with:
+          name: bench-reth-results
+          path: |
+            /tmp/bench-results-baseline/
+            /tmp/bench-results-branch/
+            /tmp/bench-summary.json
+            /tmp/bench-charts/
+
+      - name: Push charts
+        id: push-charts
+        if: success()
+        run: |
+          PR_NUMBER=${{ github.event.issue.number }}
+          RUN_ID=${{ github.run_id }}
+          CHART_DIR="pr/${PR_NUMBER}/${RUN_ID}"
+
+          if git fetch origin bench-charts 2>/dev/null; then
+            git checkout bench-charts
+          else
+            git checkout --orphan bench-charts
+            git rm -rf . 2>/dev/null || true
+          fi
+
+          mkdir -p "${CHART_DIR}"
+          cp /tmp/bench-charts/*.png "${CHART_DIR}/"
+          git add "${CHART_DIR}"
+          git -c user.name="github-actions" -c user.email="github-actions@github.com" \
+            commit -m "bench charts for PR #${PR_NUMBER} run ${RUN_ID}"
+          git push origin bench-charts
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+      - name: Compare & comment
+        if: success()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+
+            let comment = '';
+            try {
+              comment = fs.readFileSync('/tmp/bench-comment.md', 'utf8');
+            } catch (e) {
+              comment = '⚠️ Engine benchmark completed but failed to generate comparison.';
+            }
+
+            const sha = '${{ steps.push-charts.outputs.sha }}';
+            const prNumber = context.issue.number;
+            const runId = '${{ github.run_id }}';
+            const baseUrl = `https://raw.githubusercontent.com/${context.repo.owner}/${context.repo.repo}/${sha}/pr/${prNumber}/${runId}`;
+
+            const charts = [
+              { file: 'latency_throughput.png', label: 'Latency, Throughput & Diff' },
+              { file: 'wait_breakdown.png', label: 'Wait Time Breakdown' },
+              { file: 'gas_vs_latency.png', label: 'Gas vs Latency' },
+            ];
+
+            let chartMarkdown = '\n\n### Charts\n\n';
+            for (const chart of charts) {
+              chartMarkdown += `<details><summary>${chart.label}</summary>\n\n`;
+              chartMarkdown += `![${chart.label}](${baseUrl}/${chart.file})\n\n`;
+              chartMarkdown += `</details>\n\n`;
+            }
+
+            comment += chartMarkdown;
+
+            const requestedBy = '${{ github.event.comment.user.login }}';
+            const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            const body = `cc @${requestedBy}\n\n✅ Benchmark complete! [View run](${runUrl})\n\n${comment}`;
+            const ackCommentId = '${{ steps.ack.outputs.comment-id }}';
+
+            if (ackCommentId) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: parseInt(ackCommentId),
+                body,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body,
+              });
+            }
+
+      - name: Upload node log
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: reth-node-log
+          path: |
+            /tmp/reth-bench-node-baseline.log
+            /tmp/reth-bench-node-branch.log
+
+      - name: Restore system settings
+        if: always()
+        run: |
+          sudo systemctl start irqbalance cron atd 2>/dev/null || true