mirror of
https://github.com/paradigmxyz/reth.git
synced 2026-02-19 03:04:27 -05:00
ci: reth-bench (#22134)
This commit is contained in:
66
.github/scripts/bench-reth-build.sh
vendored
Executable file
66
.github/scripts/bench-reth-build.sh
vendored
Executable file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Builds (or fetches from cache) reth binaries for benchmarking.
|
||||
#
|
||||
# Usage: bench-reth-build.sh <main|branch> <commit> [branch-sha]
|
||||
#
|
||||
# main — build/fetch the baseline binary at <commit> (merge-base)
|
||||
# branch — build/fetch the candidate binary + reth-bench at <commit>
|
||||
# optional branch-sha is the PR head commit for cache key
|
||||
#
|
||||
# Outputs:
|
||||
# main: target/profiling-baseline/reth
|
||||
# branch: target/profiling/reth, reth-bench installed to cargo bin
|
||||
#
|
||||
# Required: mc (MinIO client) configured at /home/ubuntu/.mc
|
||||
set -euo pipefail
|
||||
|
||||
MC="mc --config-dir /home/ubuntu/.mc"
|
||||
MODE="$1"
|
||||
COMMIT="$2"
|
||||
|
||||
case "$MODE" in
|
||||
main)
|
||||
BUCKET="minio/reth-binaries/${COMMIT}"
|
||||
mkdir -p target/profiling-baseline
|
||||
|
||||
if $MC stat "${BUCKET}/reth" &>/dev/null; then
|
||||
echo "Cache hit for main (${COMMIT}), downloading binary..."
|
||||
$MC cp "${BUCKET}/reth" target/profiling-baseline/reth
|
||||
chmod +x target/profiling-baseline/reth
|
||||
else
|
||||
echo "Cache miss for main (${COMMIT}), building from source..."
|
||||
CURRENT_REF=$(git rev-parse HEAD)
|
||||
git checkout "${COMMIT}"
|
||||
cargo build --profile profiling --bin reth
|
||||
cp target/profiling/reth target/profiling-baseline/reth
|
||||
$MC cp target/profiling-baseline/reth "${BUCKET}/reth"
|
||||
git checkout "${CURRENT_REF}"
|
||||
fi
|
||||
;;
|
||||
|
||||
branch)
|
||||
BRANCH_SHA="${3:-$COMMIT}"
|
||||
BUCKET="minio/reth-binaries/${BRANCH_SHA}"
|
||||
|
||||
if $MC stat "${BUCKET}/reth" &>/dev/null && $MC stat "${BUCKET}/reth-bench" &>/dev/null; then
|
||||
echo "Cache hit for ${BRANCH_SHA}, downloading binaries..."
|
||||
mkdir -p target/profiling
|
||||
$MC cp "${BUCKET}/reth" target/profiling/reth
|
||||
$MC cp "${BUCKET}/reth-bench" /home/ubuntu/.cargo/bin/reth-bench
|
||||
chmod +x target/profiling/reth /home/ubuntu/.cargo/bin/reth-bench
|
||||
else
|
||||
echo "Cache miss for ${BRANCH_SHA}, building from source..."
|
||||
rustup show active-toolchain || rustup default stable
|
||||
make profiling
|
||||
make install-reth-bench
|
||||
$MC cp target/profiling/reth "${BUCKET}/reth"
|
||||
$MC cp "$(which reth-bench)" "${BUCKET}/reth-bench"
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Usage: $0 <main|branch> <commit> [branch-sha]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
232
.github/scripts/bench-reth-charts.py
vendored
Normal file
232
.github/scripts/bench-reth-charts.py
vendored
Normal file
@@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate benchmark charts from reth-bench CSV output.
|
||||
|
||||
Usage:
|
||||
bench-engine-charts.py <combined_csv> --output-dir <dir> [--baseline <baseline_csv>]
|
||||
|
||||
Generates three PNG charts:
|
||||
1. newPayload latency + Ggas/s per block (+ latency diff when baseline present)
|
||||
2. Wait breakdown (persistence, execution cache, sparse trie) per block
|
||||
3. Scatter plot of gas used vs latency
|
||||
|
||||
When --baseline is provided, charts overlay both datasets for comparison.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
print("matplotlib is required: pip install matplotlib", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
GIGAGAS = 1_000_000_000
|
||||
|
||||
|
||||
def parse_combined_csv(path: str) -> list[dict]:
|
||||
rows = []
|
||||
with open(path) as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
rows.append(
|
||||
{
|
||||
"block_number": int(row["block_number"]),
|
||||
"gas_used": int(row["gas_used"]),
|
||||
"new_payload_latency_us": int(row["new_payload_latency"]),
|
||||
"persistence_wait_us": int(row["persistence_wait"])
|
||||
if row.get("persistence_wait")
|
||||
else None,
|
||||
"execution_cache_wait_us": int(row.get("execution_cache_wait", 0)),
|
||||
"sparse_trie_wait_us": int(row.get("sparse_trie_wait", 0)),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def plot_latency_and_throughput(
|
||||
feature: list[dict], baseline: list[dict] | None, out: Path,
|
||||
baseline_name: str = "main", branch_name: str = "branch",
|
||||
):
|
||||
num_plots = 3 if baseline else 2
|
||||
fig, axes = plt.subplots(num_plots, 1, figsize=(12, 4 * num_plots), sharex=True)
|
||||
ax1, ax2 = axes[0], axes[1]
|
||||
|
||||
feat_x = [r["block_number"] for r in feature]
|
||||
feat_lat = [r["new_payload_latency_us"] / 1_000 for r in feature]
|
||||
feat_ggas = []
|
||||
for r in feature:
|
||||
lat_s = r["new_payload_latency_us"] / 1_000_000
|
||||
feat_ggas.append(r["gas_used"] / lat_s / GIGAGAS if lat_s > 0 else 0)
|
||||
|
||||
if baseline:
|
||||
base_x = [r["block_number"] for r in baseline]
|
||||
base_lat = [r["new_payload_latency_us"] / 1_000 for r in baseline]
|
||||
base_ggas = []
|
||||
for r in baseline:
|
||||
lat_s = r["new_payload_latency_us"] / 1_000_000
|
||||
base_ggas.append(r["gas_used"] / lat_s / GIGAGAS if lat_s > 0 else 0)
|
||||
ax1.plot(base_x, base_lat, linewidth=0.8, label=baseline_name, alpha=0.7)
|
||||
ax2.plot(base_x, base_ggas, linewidth=0.8, label=baseline_name, alpha=0.7)
|
||||
|
||||
ax1.plot(feat_x, feat_lat, linewidth=0.8, label=branch_name)
|
||||
ax1.set_ylabel("Latency (ms)")
|
||||
ax1.set_title("newPayload Latency per Block")
|
||||
ax1.grid(True, alpha=0.3)
|
||||
if baseline:
|
||||
ax1.legend()
|
||||
|
||||
ax2.plot(feat_x, feat_ggas, linewidth=0.8, label=branch_name)
|
||||
ax2.set_ylabel("Ggas/s")
|
||||
ax2.set_title("Execution Throughput per Block")
|
||||
ax2.grid(True, alpha=0.3)
|
||||
if baseline:
|
||||
ax2.legend()
|
||||
|
||||
if baseline:
|
||||
ax3 = axes[2]
|
||||
base_by_block = {r["block_number"]: r["new_payload_latency_us"] for r in baseline}
|
||||
blocks, diffs = [], []
|
||||
for r in feature:
|
||||
bn = r["block_number"]
|
||||
if bn in base_by_block and base_by_block[bn] > 0:
|
||||
pct = (r["new_payload_latency_us"] - base_by_block[bn]) / base_by_block[bn] * 100
|
||||
blocks.append(bn)
|
||||
diffs.append(pct)
|
||||
if blocks:
|
||||
colors = ["green" if d <= 0 else "red" for d in diffs]
|
||||
ax3.bar(blocks, diffs, width=1.0, color=colors, alpha=0.7, edgecolor="none")
|
||||
ax3.axhline(0, color="black", linewidth=0.5)
|
||||
ax3.set_ylabel("Δ Latency (%)")
|
||||
ax3.set_title("Per-Block newPayload Latency Change (branch vs main)")
|
||||
ax3.grid(True, alpha=0.3, axis="y")
|
||||
|
||||
axes[-1].set_xlabel("Block Number")
|
||||
fig.tight_layout()
|
||||
fig.savefig(out, dpi=150)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_wait_breakdown(
|
||||
feature: list[dict], baseline: list[dict] | None, out: Path,
|
||||
baseline_name: str = "main", branch_name: str = "branch",
|
||||
):
|
||||
series = [
|
||||
("Persistence Wait", "persistence_wait_us"),
|
||||
("State Cache Wait", "execution_cache_wait_us"),
|
||||
("Trie Cache Wait", "sparse_trie_wait_us"),
|
||||
]
|
||||
|
||||
fig, axes = plt.subplots(len(series), 1, figsize=(12, 3 * len(series)), sharex=True)
|
||||
for ax, (label, key) in zip(axes, series):
|
||||
if baseline:
|
||||
bx = [r["block_number"] for r in baseline if r[key] is not None]
|
||||
by = [r[key] / 1_000 for r in baseline if r[key] is not None]
|
||||
if bx:
|
||||
ax.plot(bx, by, linewidth=0.8, label=baseline_name, alpha=0.7)
|
||||
|
||||
fx = [r["block_number"] for r in feature if r[key] is not None]
|
||||
fy = [r[key] / 1_000 for r in feature if r[key] is not None]
|
||||
if fx:
|
||||
ax.plot(fx, fy, linewidth=0.8, label=branch_name)
|
||||
|
||||
ax.set_ylabel("ms")
|
||||
ax.set_title(label)
|
||||
ax.grid(True, alpha=0.3)
|
||||
if baseline:
|
||||
ax.legend()
|
||||
|
||||
axes[-1].set_xlabel("Block Number")
|
||||
fig.suptitle("Wait Time Breakdown per Block", fontsize=14, y=1.01)
|
||||
fig.tight_layout()
|
||||
fig.savefig(out, dpi=150, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def _add_regression(ax, x, y, color, label):
|
||||
"""Add a linear regression line to the axes."""
|
||||
if len(x) < 2:
|
||||
return
|
||||
xa, ya = np.array(x), np.array(y)
|
||||
m, b = np.polyfit(xa, ya, 1)
|
||||
x_range = np.linspace(xa.min(), xa.max(), 100)
|
||||
ax.plot(x_range, m * x_range + b, color=color, linewidth=1.5, alpha=0.8,
|
||||
label=label)
|
||||
|
||||
|
||||
def plot_gas_vs_latency(
|
||||
feature: list[dict], baseline: list[dict] | None, out: Path,
|
||||
baseline_name: str = "main", branch_name: str = "branch",
|
||||
):
|
||||
fig, ax = plt.subplots(figsize=(8, 6))
|
||||
|
||||
if baseline:
|
||||
bgas = [r["gas_used"] / 1_000_000 for r in baseline]
|
||||
blat = [r["new_payload_latency_us"] / 1_000 for r in baseline]
|
||||
ax.scatter(bgas, blat, s=8, alpha=0.5)
|
||||
_add_regression(ax, bgas, blat, "tab:blue", baseline_name)
|
||||
|
||||
fgas = [r["gas_used"] / 1_000_000 for r in feature]
|
||||
flat = [r["new_payload_latency_us"] / 1_000 for r in feature]
|
||||
ax.scatter(fgas, flat, s=8, alpha=0.6)
|
||||
_add_regression(ax, fgas, flat, "tab:orange", branch_name)
|
||||
|
||||
ax.set_xlabel("Gas Used (Mgas)")
|
||||
ax.set_ylabel("newPayload Latency (ms)")
|
||||
ax.set_title("Gas Used vs Latency")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend()
|
||||
fig.tight_layout()
|
||||
fig.savefig(out, dpi=150)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate benchmark charts")
|
||||
parser.add_argument("combined_csv", help="Path to combined_latency.csv (feature)")
|
||||
parser.add_argument(
|
||||
"--output-dir", required=True, help="Output directory for PNG charts"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--baseline", help="Path to baseline (main) combined_latency.csv"
|
||||
)
|
||||
parser.add_argument("--baseline-name", default="main", help="Label for baseline")
|
||||
parser.add_argument("--branch-name", default="branch", help="Label for branch")
|
||||
args = parser.parse_args()
|
||||
|
||||
feature = parse_combined_csv(args.combined_csv)
|
||||
if not feature:
|
||||
print("No results found in combined CSV", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
baseline = None
|
||||
if args.baseline:
|
||||
baseline = parse_combined_csv(args.baseline)
|
||||
if not baseline:
|
||||
print(
|
||||
"Warning: no results in baseline CSV, skipping comparison",
|
||||
file=sys.stderr,
|
||||
)
|
||||
baseline = None
|
||||
|
||||
out_dir = Path(args.output_dir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
bname = args.baseline_name
|
||||
fname = args.branch_name
|
||||
plot_latency_and_throughput(feature, baseline, out_dir / "latency_throughput.png", bname, fname)
|
||||
plot_wait_breakdown(feature, baseline, out_dir / "wait_breakdown.png", bname, fname)
|
||||
plot_gas_vs_latency(feature, baseline, out_dir / "gas_vs_latency.png", bname, fname)
|
||||
|
||||
print(f"Charts written to {out_dir}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
94
.github/scripts/bench-reth-run.sh
vendored
Executable file
94
.github/scripts/bench-reth-run.sh
vendored
Executable file
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Runs a single reth-bench cycle: mount snapshot → start node → warmup →
|
||||
# benchmark → stop node → recover snapshot.
|
||||
#
|
||||
# Usage: bench-reth-run.sh <label> <binary> <output-dir>
|
||||
#
|
||||
# Required env: SCHELK_MOUNT, BENCH_RPC_URL, BENCH_BLOCKS, BENCH_WARMUP_BLOCKS
|
||||
set -euo pipefail
|
||||
|
||||
LABEL="$1"
|
||||
BINARY="$2"
|
||||
OUTPUT_DIR="$3"
|
||||
DATADIR="$SCHELK_MOUNT/datadir"
|
||||
LOG="/tmp/reth-bench-node-${LABEL}.log"
|
||||
|
||||
cleanup() {
|
||||
kill "$TAIL_PID" 2>/dev/null || true
|
||||
if [ -n "${RETH_PID:-}" ] && sudo kill -0 "$RETH_PID" 2>/dev/null; then
|
||||
sudo kill "$RETH_PID"
|
||||
for i in $(seq 1 30); do
|
||||
sudo kill -0 "$RETH_PID" 2>/dev/null || break
|
||||
sleep 1
|
||||
done
|
||||
sudo kill -9 "$RETH_PID" 2>/dev/null || true
|
||||
fi
|
||||
mountpoint -q "$SCHELK_MOUNT" && sudo schelk recover -y || true
|
||||
}
|
||||
TAIL_PID=
|
||||
trap cleanup EXIT
|
||||
|
||||
# Mount
|
||||
sudo schelk mount -y
|
||||
sync
|
||||
sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'
|
||||
echo "=== Cache state after drop ==="
|
||||
free -h
|
||||
grep Cached /proc/meminfo
|
||||
|
||||
# Start reth
|
||||
# CPU layout: core 0 = OS/IRQs/reth-bench/aux, cores 1+ = reth node
|
||||
RETH_BENCH="$(which reth-bench)"
|
||||
ONLINE=$(nproc --all)
|
||||
RETH_CPUS="1-$(( ONLINE - 1 ))"
|
||||
sudo taskset -c "$RETH_CPUS" nice -n -20 "$BINARY" node \
|
||||
--datadir "$DATADIR" \
|
||||
--engine.accept-execution-requests-hash \
|
||||
--http \
|
||||
--http.port 8545 \
|
||||
--ws \
|
||||
--ws.api all \
|
||||
--authrpc.port 8551 \
|
||||
--disable-discovery \
|
||||
--no-persist-peers \
|
||||
> "$LOG" 2>&1 &
|
||||
|
||||
RETH_PID=$!
|
||||
stdbuf -oL tail -f "$LOG" | sed -u "s/^/[reth] /" &
|
||||
TAIL_PID=$!
|
||||
|
||||
for i in $(seq 1 60); do
|
||||
if curl -sf http://127.0.0.1:8545 -X POST \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' \
|
||||
> /dev/null 2>&1; then
|
||||
echo "reth (${LABEL}) is ready after ${i}s"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 60 ]; then
|
||||
echo "::error::reth (${LABEL}) failed to start within 60s"
|
||||
cat "$LOG"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Warmup
|
||||
sudo nice -n -20 "$RETH_BENCH" new-payload-fcu \
|
||||
--rpc-url "$BENCH_RPC_URL" \
|
||||
--engine-rpc-url http://127.0.0.1:8551 \
|
||||
--jwt-secret "$DATADIR/jwt.hex" \
|
||||
--advance "${BENCH_WARMUP_BLOCKS:-50}" \
|
||||
--reth-new-payload 2>&1 | sed -u "s/^/[bench] /"
|
||||
|
||||
# Benchmark
|
||||
sudo nice -n -20 "$RETH_BENCH" new-payload-fcu \
|
||||
--rpc-url "$BENCH_RPC_URL" \
|
||||
--engine-rpc-url http://127.0.0.1:8551 \
|
||||
--jwt-secret "$DATADIR/jwt.hex" \
|
||||
--advance "$BENCH_BLOCKS" \
|
||||
--reth-new-payload \
|
||||
--output "$OUTPUT_DIR" 2>&1 | sed -u "s/^/[bench] /"
|
||||
|
||||
# cleanup runs via trap
|
||||
415
.github/scripts/bench-reth-summary.py
vendored
Executable file
415
.github/scripts/bench-reth-summary.py
vendored
Executable file
@@ -0,0 +1,415 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Parse reth-bench CSV output and generate a summary JSON + markdown comparison.
|
||||
|
||||
Usage:
|
||||
bench-reth-summary.py <combined_csv> <gas_csv> \
|
||||
--output-summary <summary.json> \
|
||||
--output-markdown <comment.md> \
|
||||
--baseline-csv <baseline_combined.csv> \
|
||||
[--repo <owner/repo>] \
|
||||
[--baseline-ref <sha>] \
|
||||
[--branch-name <name>] \
|
||||
[--branch-sha <sha>]
|
||||
|
||||
Generates a paired statistical comparison between baseline (main) and branch.
|
||||
Matches blocks by number and computes per-block diffs to cancel out gas
|
||||
variance. Fails if baseline or branch CSV is missing or empty.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
import sys
|
||||
|
||||
GIGAGAS = 1_000_000_000
|
||||
T_CRITICAL = 1.96 # two-tailed 95% confidence
|
||||
BOOTSTRAP_ITERATIONS = 10_000
|
||||
|
||||
|
||||
def parse_combined_csv(path: str) -> list[dict]:
|
||||
"""Parse combined_latency.csv into a list of per-block dicts."""
|
||||
rows = []
|
||||
with open(path) as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
rows.append(
|
||||
{
|
||||
"block_number": int(row["block_number"]),
|
||||
"gas_used": int(row["gas_used"]),
|
||||
"gas_limit": int(row["gas_limit"]),
|
||||
"transaction_count": int(row["transaction_count"]),
|
||||
"new_payload_latency_us": int(row["new_payload_latency"]),
|
||||
"fcu_latency_us": int(row["fcu_latency"]),
|
||||
"total_latency_us": int(row["total_latency"]),
|
||||
"persistence_wait_us": int(row["persistence_wait"])
|
||||
if row.get("persistence_wait")
|
||||
else None,
|
||||
"execution_cache_wait_us": int(row.get("execution_cache_wait", 0)),
|
||||
"sparse_trie_wait_us": int(row.get("sparse_trie_wait", 0)),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def parse_gas_csv(path: str) -> list[dict]:
|
||||
"""Parse total_gas.csv into a list of per-block dicts."""
|
||||
rows = []
|
||||
with open(path) as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
rows.append(
|
||||
{
|
||||
"block_number": int(row["block_number"]),
|
||||
"gas_used": int(row["gas_used"]),
|
||||
"time_us": int(row["time"]),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def stddev(values: list[float], mean: float) -> float:
|
||||
if len(values) < 2:
|
||||
return 0.0
|
||||
return math.sqrt(sum((v - mean) ** 2 for v in values) / (len(values) - 1))
|
||||
|
||||
|
||||
def percentile(sorted_vals: list[float], pct: int) -> float:
|
||||
if not sorted_vals:
|
||||
return 0.0
|
||||
idx = int(len(sorted_vals) * pct / 100)
|
||||
idx = min(idx, len(sorted_vals) - 1)
|
||||
return sorted_vals[idx]
|
||||
|
||||
|
||||
def compute_stats(combined: list[dict]) -> dict:
|
||||
"""Compute per-run statistics from parsed CSV data."""
|
||||
n = len(combined)
|
||||
if n == 0:
|
||||
return {}
|
||||
|
||||
latencies_ms = [r["new_payload_latency_us"] / 1_000 for r in combined]
|
||||
sorted_lat = sorted(latencies_ms)
|
||||
mean_lat = sum(latencies_ms) / n
|
||||
std_lat = stddev(latencies_ms, mean_lat)
|
||||
|
||||
mgas_s_values = []
|
||||
for r in combined:
|
||||
lat_s = r["new_payload_latency_us"] / 1_000_000
|
||||
if lat_s > 0:
|
||||
mgas_s_values.append(r["gas_used"] / lat_s / 1_000_000)
|
||||
mean_mgas_s = sum(mgas_s_values) / len(mgas_s_values) if mgas_s_values else 0
|
||||
|
||||
return {
|
||||
"n": n,
|
||||
"mean_ms": mean_lat,
|
||||
"stddev_ms": std_lat,
|
||||
"p50_ms": percentile(sorted_lat, 50),
|
||||
"p90_ms": percentile(sorted_lat, 90),
|
||||
"p99_ms": percentile(sorted_lat, 99),
|
||||
"mean_mgas_s": mean_mgas_s,
|
||||
}
|
||||
|
||||
|
||||
def _paired_data(
|
||||
baseline: list[dict], branch: list[dict]
|
||||
) -> tuple[list[tuple[float, float]], list[float], list[float]]:
|
||||
"""Match blocks and return paired latencies and per-block diffs.
|
||||
|
||||
Returns:
|
||||
pairs: list of (baseline_ms, branch_ms) tuples
|
||||
lat_diffs_ms: list of branch − baseline latency diffs in ms
|
||||
mgas_diffs: list of branch − baseline Mgas/s diffs
|
||||
"""
|
||||
baseline_by_block = {r["block_number"]: r for r in baseline}
|
||||
branch_by_block = {r["block_number"]: r for r in branch}
|
||||
common_blocks = sorted(set(baseline_by_block) & set(branch_by_block))
|
||||
|
||||
pairs = []
|
||||
lat_diffs_ms = []
|
||||
mgas_diffs = []
|
||||
for bn in common_blocks:
|
||||
b = baseline_by_block[bn]
|
||||
f = branch_by_block[bn]
|
||||
b_ms = b["new_payload_latency_us"] / 1_000
|
||||
f_ms = f["new_payload_latency_us"] / 1_000
|
||||
pairs.append((b_ms, f_ms))
|
||||
lat_diffs_ms.append(f_ms - b_ms)
|
||||
b_lat_s = b["new_payload_latency_us"] / 1_000_000
|
||||
f_lat_s = f["new_payload_latency_us"] / 1_000_000
|
||||
if b_lat_s > 0 and f_lat_s > 0:
|
||||
mgas_diffs.append(
|
||||
f["gas_used"] / f_lat_s / 1_000_000
|
||||
- b["gas_used"] / b_lat_s / 1_000_000
|
||||
)
|
||||
return pairs, lat_diffs_ms, mgas_diffs
|
||||
|
||||
|
||||
def compute_paired_stats(
|
||||
baseline_runs: list[list[dict]],
|
||||
branch_runs: list[list[dict]],
|
||||
) -> dict:
|
||||
"""Compute paired statistics between baseline and branch runs.
|
||||
|
||||
Each pair (baseline_runs[i], branch_runs[i]) produces per-block diffs.
|
||||
All diffs are pooled for the final CI.
|
||||
"""
|
||||
all_pairs = []
|
||||
all_lat_diffs = []
|
||||
all_mgas_diffs = []
|
||||
for baseline, branch in zip(baseline_runs, branch_runs):
|
||||
pairs, lat_diffs, mgas_diffs = _paired_data(baseline, branch)
|
||||
all_pairs.extend(pairs)
|
||||
all_lat_diffs.extend(lat_diffs)
|
||||
all_mgas_diffs.extend(mgas_diffs)
|
||||
|
||||
if not all_lat_diffs:
|
||||
return {}
|
||||
|
||||
n = len(all_lat_diffs)
|
||||
mean_diff = sum(all_lat_diffs) / n
|
||||
std_diff = stddev(all_lat_diffs, mean_diff)
|
||||
se = std_diff / math.sqrt(n) if n > 0 else 0.0
|
||||
ci = T_CRITICAL * se
|
||||
|
||||
# Bootstrap CI on difference-of-percentiles (resample paired blocks)
|
||||
base_lats = sorted([p[0] for p in all_pairs])
|
||||
branch_lats = sorted([p[1] for p in all_pairs])
|
||||
p50_diff = percentile(branch_lats, 50) - percentile(base_lats, 50)
|
||||
p90_diff = percentile(branch_lats, 90) - percentile(base_lats, 90)
|
||||
p99_diff = percentile(branch_lats, 99) - percentile(base_lats, 99)
|
||||
|
||||
rng = random.Random(42)
|
||||
p50_boot, p90_boot, p99_boot = [], [], []
|
||||
for _ in range(BOOTSTRAP_ITERATIONS):
|
||||
sample = rng.choices(all_pairs, k=n)
|
||||
b_sorted = sorted(p[0] for p in sample)
|
||||
f_sorted = sorted(p[1] for p in sample)
|
||||
p50_boot.append(percentile(f_sorted, 50) - percentile(b_sorted, 50))
|
||||
p90_boot.append(percentile(f_sorted, 90) - percentile(b_sorted, 90))
|
||||
p99_boot.append(percentile(f_sorted, 99) - percentile(b_sorted, 99))
|
||||
p50_boot.sort()
|
||||
p90_boot.sort()
|
||||
p99_boot.sort()
|
||||
lo = int(BOOTSTRAP_ITERATIONS * 0.025)
|
||||
hi = int(BOOTSTRAP_ITERATIONS * 0.975)
|
||||
|
||||
mean_mgas_diff = sum(all_mgas_diffs) / len(all_mgas_diffs) if all_mgas_diffs else 0.0
|
||||
std_mgas_diff = stddev(all_mgas_diffs, mean_mgas_diff) if len(all_mgas_diffs) > 1 else 0.0
|
||||
mgas_se = std_mgas_diff / math.sqrt(len(all_mgas_diffs)) if all_mgas_diffs else 0.0
|
||||
mgas_ci = T_CRITICAL * mgas_se
|
||||
|
||||
return {
|
||||
"n": n,
|
||||
"mean_diff_ms": mean_diff,
|
||||
"ci_ms": ci,
|
||||
"p50_diff_ms": p50_diff,
|
||||
"p50_ci_ms": (p50_boot[hi] - p50_boot[lo]) / 2,
|
||||
"p90_diff_ms": p90_diff,
|
||||
"p90_ci_ms": (p90_boot[hi] - p90_boot[lo]) / 2,
|
||||
"p99_diff_ms": p99_diff,
|
||||
"p99_ci_ms": (p99_boot[hi] - p99_boot[lo]) / 2,
|
||||
"mean_mgas_diff": mean_mgas_diff,
|
||||
"mgas_ci": mgas_ci,
|
||||
}
|
||||
|
||||
|
||||
def compute_summary(combined: list[dict], gas: list[dict]) -> dict:
|
||||
"""Compute aggregate metrics from parsed CSV data."""
|
||||
blocks = len(combined)
|
||||
return {
|
||||
"blocks": blocks,
|
||||
}
|
||||
|
||||
|
||||
def format_duration(seconds: float) -> str:
|
||||
if seconds >= 60:
|
||||
return f"{seconds / 60:.1f}min"
|
||||
return f"{seconds}s"
|
||||
|
||||
|
||||
def format_gas(gas: int) -> str:
|
||||
if gas >= GIGAGAS:
|
||||
return f"{gas / GIGAGAS:.1f}G"
|
||||
if gas >= 1_000_000:
|
||||
return f"{gas / 1_000_000:.1f}M"
|
||||
return f"{gas:,}"
|
||||
|
||||
|
||||
|
||||
def fmt_ms(v: float) -> str:
|
||||
return f"{v:.2f}ms"
|
||||
|
||||
|
||||
def fmt_mgas(v: float) -> str:
|
||||
return f"{v:.2f}"
|
||||
|
||||
|
||||
def change_str(pct: float, ci_pct: float, lower_is_better: bool) -> str:
|
||||
"""Format change% with paired CI significance.
|
||||
|
||||
Significant if the CI doesn't cross zero (i.e. |pct| > ci_pct).
|
||||
"""
|
||||
significant = abs(pct) > ci_pct
|
||||
if not significant:
|
||||
emoji = "⚪"
|
||||
elif (pct < 0) == lower_is_better:
|
||||
emoji = "✅"
|
||||
else:
|
||||
emoji = "❌"
|
||||
|
||||
return f"{pct:+.2f}% {emoji} (±{ci_pct:.2f}%)"
|
||||
|
||||
|
||||
def generate_comparison_table(
|
||||
run1: dict,
|
||||
run2: dict,
|
||||
paired: dict,
|
||||
repo: str,
|
||||
baseline_ref: str,
|
||||
branch_name: str,
|
||||
branch_sha: str,
|
||||
) -> str:
|
||||
"""Generate a markdown comparison table between baseline (main) and branch."""
|
||||
n = paired["n"]
|
||||
|
||||
def pct(base: float, feat: float) -> float:
|
||||
return (feat - base) / base * 100.0 if base > 0 else 0.0
|
||||
|
||||
mean_pct = pct(run1["mean_ms"], run2["mean_ms"])
|
||||
gas_pct = pct(run1["mean_mgas_s"], run2["mean_mgas_s"])
|
||||
|
||||
p50_pct = pct(run1["p50_ms"], run2["p50_ms"])
|
||||
p90_pct = pct(run1["p90_ms"], run2["p90_ms"])
|
||||
p99_pct = pct(run1["p99_ms"], run2["p99_ms"])
|
||||
|
||||
# Bootstrap CIs as % of baseline percentile
|
||||
p50_ci_pct = paired["p50_ci_ms"] / run1["p50_ms"] * 100.0 if run1["p50_ms"] > 0 else 0.0
|
||||
p90_ci_pct = paired["p90_ci_ms"] / run1["p90_ms"] * 100.0 if run1["p90_ms"] > 0 else 0.0
|
||||
p99_ci_pct = paired["p99_ci_ms"] / run1["p99_ms"] * 100.0 if run1["p99_ms"] > 0 else 0.0
|
||||
|
||||
# CI as a percentage of baseline mean
|
||||
lat_ci_pct = paired["ci_ms"] / run1["mean_ms"] * 100.0 if run1["mean_ms"] > 0 else 0.0
|
||||
mgas_ci_pct = paired["mgas_ci"] / run1["mean_mgas_s"] * 100.0 if run1["mean_mgas_s"] > 0 else 0.0
|
||||
|
||||
base_url = f"https://github.com/{repo}/commit"
|
||||
baseline_label = f"[`main`]({base_url}/{baseline_ref})"
|
||||
branch_label = f"[`{branch_name}`]({base_url}/{branch_sha})"
|
||||
|
||||
lines = [
|
||||
f"| Metric | {baseline_label} | {branch_label} | Change |",
|
||||
"|--------|------|--------|--------|",
|
||||
f"| Mean | {fmt_ms(run1['mean_ms'])} | {fmt_ms(run2['mean_ms'])} | {change_str(mean_pct, lat_ci_pct, lower_is_better=True)} |",
|
||||
f"| StdDev | {fmt_ms(run1['stddev_ms'])} | {fmt_ms(run2['stddev_ms'])} | |",
|
||||
f"| P50 | {fmt_ms(run1['p50_ms'])} | {fmt_ms(run2['p50_ms'])} | {change_str(p50_pct, p50_ci_pct, lower_is_better=True)} |",
|
||||
f"| P90 | {fmt_ms(run1['p90_ms'])} | {fmt_ms(run2['p90_ms'])} | {change_str(p90_pct, p90_ci_pct, lower_is_better=True)} |",
|
||||
f"| P99 | {fmt_ms(run1['p99_ms'])} | {fmt_ms(run2['p99_ms'])} | {change_str(p99_pct, p99_ci_pct, lower_is_better=True)} |",
|
||||
f"| Mgas/s | {fmt_mgas(run1['mean_mgas_s'])} | {fmt_mgas(run2['mean_mgas_s'])} | {change_str(gas_pct, mgas_ci_pct, lower_is_better=False)} |",
|
||||
"",
|
||||
f"*{n} blocks*",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_markdown(
|
||||
summary: dict, comparison_table: str,
|
||||
behind_main: int = 0, repo: str = "", baseline_ref: str = "",
|
||||
) -> str:
|
||||
"""Generate a markdown comment body."""
|
||||
lines = ["## Benchmark Results", "", comparison_table]
|
||||
if behind_main > 0:
|
||||
s = "s" if behind_main > 1 else ""
|
||||
diff_link = f"https://github.com/{repo}/compare/{baseline_ref[:12]}...main"
|
||||
lines.append("")
|
||||
lines.append(f"> ⚠️ Branch is [**{behind_main} commit{s} behind `main`**]({diff_link}). Consider rebasing for accurate results.")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Parse reth-bench ABBA results")
|
||||
parser.add_argument(
|
||||
"--baseline-csv", nargs="+", required=True,
|
||||
help="Baseline combined_latency.csv files (A1, A2)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--branch-csv", nargs="+", required=True,
|
||||
help="Branch combined_latency.csv files (B1, B2)",
|
||||
)
|
||||
parser.add_argument("--gas-csv", required=True, help="Path to total_gas.csv")
|
||||
parser.add_argument(
|
||||
"--output-summary", required=True, help="Output JSON summary path"
|
||||
)
|
||||
parser.add_argument("--output-markdown", required=True, help="Output markdown path")
|
||||
parser.add_argument(
|
||||
"--repo", default="paradigmxyz/reth", help="GitHub repo (owner/name)"
|
||||
)
|
||||
parser.add_argument("--baseline-ref", default=None, help="Baseline commit SHA")
|
||||
parser.add_argument("--branch-name", default=None, help="Branch name")
|
||||
parser.add_argument("--branch-sha", default=None, help="Branch commit SHA")
|
||||
parser.add_argument("--behind-main", type=int, default=0, help="Commits behind main")
|
||||
args = parser.parse_args()
|
||||
|
||||
if len(args.baseline_csv) != len(args.branch_csv):
|
||||
print("Must provide equal number of baseline and branch CSVs", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
baseline_runs = []
|
||||
branch_runs = []
|
||||
for path in args.baseline_csv:
|
||||
data = parse_combined_csv(path)
|
||||
if not data:
|
||||
print(f"No results in {path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
baseline_runs.append(data)
|
||||
for path in args.branch_csv:
|
||||
data = parse_combined_csv(path)
|
||||
if not data:
|
||||
print(f"No results in {path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
branch_runs.append(data)
|
||||
|
||||
gas = parse_gas_csv(args.gas_csv)
|
||||
|
||||
all_baseline = [r for run in baseline_runs for r in run]
|
||||
all_branch = [r for run in branch_runs for r in run]
|
||||
|
||||
summary = compute_summary(all_branch, gas)
|
||||
with open(args.output_summary, "w") as f:
|
||||
json.dump(summary, f, indent=2)
|
||||
print(f"Summary written to {args.output_summary}")
|
||||
|
||||
baseline_stats = compute_stats(all_baseline)
|
||||
branch_stats = compute_stats(all_branch)
|
||||
paired_stats = compute_paired_stats(baseline_runs, branch_runs)
|
||||
|
||||
if not paired_stats:
|
||||
print("No common blocks between baseline and branch runs", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
comparison_table = generate_comparison_table(
|
||||
baseline_stats,
|
||||
branch_stats,
|
||||
paired_stats,
|
||||
repo=args.repo,
|
||||
baseline_ref=args.baseline_ref or "main",
|
||||
branch_name=args.branch_name or "branch",
|
||||
branch_sha=args.branch_sha or "unknown",
|
||||
)
|
||||
print(f"Generated comparison ({paired_stats['n']} paired blocks, "
|
||||
f"mean diff {paired_stats['mean_diff_ms']:+.3f}ms ± {paired_stats['ci_ms']:.3f}ms)")
|
||||
|
||||
markdown = generate_markdown(
|
||||
summary, comparison_table,
|
||||
behind_main=args.behind_main,
|
||||
repo=args.repo,
|
||||
baseline_ref=args.baseline_ref or "",
|
||||
)
|
||||
|
||||
with open(args.output_markdown, "w") as f:
|
||||
f.write(markdown)
|
||||
print(f"Markdown written to {args.output_markdown}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
371
.github/workflows/bench.yml
vendored
371
.github/workflows/bench.yml
vendored
@@ -1,11 +1,25 @@
|
||||
# Runs benchmarks.
|
||||
#
|
||||
# The reth-bench job replays real blocks via the Engine API against a reth node
|
||||
# backed by a local snapshot managed with schelk.
|
||||
#
|
||||
# It runs the main (baseline) binary and the branch (candidate) binary on the
|
||||
# same block range (snapshot recovered between runs) to compare performance.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
# TODO: Disabled temporarily for https://github.com/CodSpeedHQ/runner/issues/55
|
||||
# merge_group:
|
||||
push:
|
||||
branches: [main]
|
||||
issue_comment:
|
||||
types: [created, edited]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
blocks:
|
||||
description: "Number of blocks to benchmark"
|
||||
required: false
|
||||
default: "50"
|
||||
type: string
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -14,9 +28,19 @@ env:
|
||||
RUSTC_WRAPPER: "sccache"
|
||||
|
||||
name: bench
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: bench-${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
codspeed:
|
||||
runs-on: ${{ github.repository == 'paradigmxyz/reth' && 'depot-ubuntu-latest' || 'ubuntu-latest' }}
|
||||
if: github.event_name != 'issue_comment'
|
||||
runs-on: depot-ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
partition: [1, 2]
|
||||
@@ -31,6 +55,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
ref: ${{ github.event_name == 'issue_comment' && format('refs/pull/{0}/merge', github.event.issue.number) || '' }}
|
||||
- uses: rui314/setup-mold@v1
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
- uses: mozilla-actions/sccache-action@v0.0.9
|
||||
@@ -49,3 +74,345 @@ jobs:
|
||||
run: cargo codspeed run ${{ matrix.crates }}
|
||||
mode: instrumentation
|
||||
token: ${{ secrets.CODSPEED_TOKEN }}
|
||||
|
||||
reth-bench:
|
||||
if: github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, 'derek bench')
|
||||
name: reth-bench
|
||||
runs-on: [self-hosted, Linux, X64]
|
||||
timeout-minutes: 120
|
||||
env:
|
||||
BENCH_RPC_URL: https://ethereum.reth.rs/rpc
|
||||
SCHELK_MOUNT: /reth-bench
|
||||
steps:
|
||||
- name: Check org membership
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const user = context.payload.comment.user.login;
|
||||
try {
|
||||
const { status } = await github.rest.orgs.checkMembershipForUser({
|
||||
org: 'paradigmxyz',
|
||||
username: user,
|
||||
});
|
||||
if (status !== 204 && status !== 302) {
|
||||
core.setFailed(`@${user} is not a member of paradigmxyz`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.setFailed(`@${user} is not a member of paradigmxyz`);
|
||||
}
|
||||
|
||||
- name: Parse arguments
|
||||
id: args
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const body = context.payload.comment.body.trim();
|
||||
const known = new Set(['blocks', 'warmup']);
|
||||
const defaults = { blocks: '500', warmup: '100' };
|
||||
const unknown = [];
|
||||
const invalid = [];
|
||||
const args = body.replace(/^derek bench\s*/, '');
|
||||
for (const part of args.split(/\s+/).filter(Boolean)) {
|
||||
const eq = part.indexOf('=');
|
||||
if (eq === -1) {
|
||||
unknown.push(part);
|
||||
continue;
|
||||
}
|
||||
const key = part.slice(0, eq);
|
||||
const value = part.slice(eq + 1);
|
||||
if (!known.has(key)) {
|
||||
unknown.push(key);
|
||||
} else if (!/^\d+$/.test(value)) {
|
||||
invalid.push(`\`${key}=${value}\` (must be a positive integer)`);
|
||||
} else {
|
||||
defaults[key] = value;
|
||||
}
|
||||
}
|
||||
const errors = [];
|
||||
if (unknown.length) errors.push(`Unknown argument(s): \`${unknown.join('`, `')}\``);
|
||||
if (invalid.length) errors.push(`Invalid value(s): ${invalid.join(', ')}`);
|
||||
if (errors.length) {
|
||||
const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`derek bench [blocks=N] [warmup=N]\``;
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: msg,
|
||||
});
|
||||
core.setFailed(msg);
|
||||
return;
|
||||
}
|
||||
core.setOutput('blocks', defaults.blocks);
|
||||
core.setOutput('warmup', defaults.warmup);
|
||||
core.exportVariable('BENCH_BLOCKS', defaults.blocks);
|
||||
core.exportVariable('BENCH_WARMUP_BLOCKS', defaults.warmup);
|
||||
|
||||
- name: Acknowledge request
|
||||
id: ack
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
await github.rest.reactions.createForIssueComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: context.payload.comment.id,
|
||||
content: 'eyes',
|
||||
});
|
||||
|
||||
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
||||
const blocks = '${{ steps.args.outputs.blocks }}';
|
||||
const warmup = '${{ steps.args.outputs.warmup }}';
|
||||
const { data: comment } = await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Building binaries...\n\n**Config:** ${blocks} blocks, ${warmup} warmup blocks`,
|
||||
});
|
||||
core.setOutput('comment-id', comment.id);
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
ref: ${{ format('refs/pull/{0}/merge', github.event.issue.number) }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
- uses: mozilla-actions/sccache-action@v0.0.9
|
||||
continue-on-error: true
|
||||
|
||||
# Verify all required tools are available
|
||||
- name: Check dependencies
|
||||
run: |
|
||||
export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
|
||||
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
missing=()
|
||||
for cmd in mc schelk cpupower taskset stdbuf python3 curl make uv; do
|
||||
command -v "$cmd" &>/dev/null || missing+=("$cmd")
|
||||
done
|
||||
if [ ${#missing[@]} -gt 0 ]; then
|
||||
echo "::error::Missing required tools: ${missing[*]}"
|
||||
exit 1
|
||||
fi
|
||||
echo "All dependencies found"
|
||||
|
||||
# Build binaries
|
||||
- name: Fetch or build main binaries
|
||||
run: |
|
||||
MERGE_BASE=$(git merge-base HEAD origin/main 2>/dev/null || echo "${{ github.sha }}")
|
||||
.github/scripts/bench-reth-build.sh main "$MERGE_BASE"
|
||||
- name: Fetch or build branch binaries
|
||||
run: |
|
||||
BRANCH_SHA="${{ github.sha }}"
|
||||
.github/scripts/bench-reth-build.sh branch "$BRANCH_SHA"
|
||||
|
||||
# System tuning for reproducible benchmarks
|
||||
- name: System setup
|
||||
run: |
|
||||
sudo cpupower frequency-set -g performance || true
|
||||
# Disable turbo boost (Intel and AMD paths)
|
||||
echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo 2>/dev/null || true
|
||||
echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost 2>/dev/null || true
|
||||
sudo swapoff -a || true
|
||||
echo 0 | sudo tee /proc/sys/kernel/randomize_va_space || true
|
||||
# Disable SMT (hyperthreading)
|
||||
for cpu in /sys/devices/system/cpu/cpu*/topology/thread_siblings_list; do
|
||||
first=$(cut -d, -f1 < "$cpu" | cut -d- -f1)
|
||||
current=$(echo "$cpu" | grep -o 'cpu[0-9]*' | grep -o '[0-9]*')
|
||||
if [ "$current" != "$first" ]; then
|
||||
echo 0 | sudo tee "/sys/devices/system/cpu/cpu${current}/online" || true
|
||||
fi
|
||||
done
|
||||
echo "Online CPUs: $(nproc)"
|
||||
# Disable transparent huge pages (compaction causes latency spikes)
|
||||
for p in /sys/kernel/mm/transparent_hugepage /sys/kernel/mm/transparent_hugepages; do
|
||||
[ -d "$p" ] && echo never | sudo tee "$p/enabled" && echo never | sudo tee "$p/defrag" && break
|
||||
done || true
|
||||
# Prevent deep C-states (avoids wake-up latency jitter)
|
||||
sudo sh -c 'exec 3<>/dev/cpu_dma_latency; echo -ne "\x00\x00\x00\x00" >&3; sleep infinity' &
|
||||
# Move all IRQs to core 0 (housekeeping core)
|
||||
for irq in /proc/irq/*/smp_affinity_list; do
|
||||
echo 0 | sudo tee "$irq" 2>/dev/null || true
|
||||
done
|
||||
# Stop noisy background services
|
||||
sudo systemctl stop irqbalance cron atd unattended-upgrades snapd 2>/dev/null || true
|
||||
# Log environment for reproducibility
|
||||
echo "=== Benchmark environment ==="
|
||||
uname -r
|
||||
lscpu | grep -E 'Model name|CPU\(s\)|MHz|NUMA'
|
||||
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
|
||||
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq
|
||||
cat /sys/kernel/mm/transparent_hugepage/enabled 2>/dev/null || cat /sys/kernel/mm/transparent_hugepages/enabled 2>/dev/null || echo "THP: unknown"
|
||||
free -h
|
||||
|
||||
# Clean up any leftover state
|
||||
- name: Pre-flight cleanup
|
||||
run: |
|
||||
pkill -9 reth || true
|
||||
mountpoint -q "$SCHELK_MOUNT" && sudo schelk recover -y || true
|
||||
|
||||
- name: Update status (running benchmarks)
|
||||
if: steps.ack.outputs.comment-id
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: ${{ steps.ack.outputs.comment-id || 0 }},
|
||||
body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Running benchmarks (2 runs)...`,
|
||||
});
|
||||
|
||||
- name: "Run benchmark: baseline"
|
||||
run: taskset -c 0 .github/scripts/bench-reth-run.sh baseline target/profiling-baseline/reth /tmp/bench-results-baseline
|
||||
|
||||
- name: "Run benchmark: branch"
|
||||
run: taskset -c 0 .github/scripts/bench-reth-run.sh branch target/profiling/reth /tmp/bench-results-branch
|
||||
|
||||
# Results & charts
|
||||
- name: Parse results
|
||||
id: results
|
||||
if: success()
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
BRANCH_SHA: ${{ github.sha }}
|
||||
run: |
|
||||
git fetch origin main --quiet
|
||||
# Use the actual PR head commit, not HEAD (which is the merge commit
|
||||
# refs/pull/N/merge and always has origin/main as a parent).
|
||||
MERGE_BASE=$(git merge-base "${BRANCH_SHA}" origin/main 2>/dev/null || echo "${{ github.sha }}")
|
||||
MAIN_HEAD=$(git rev-parse origin/main 2>/dev/null || echo "")
|
||||
BEHIND_MAIN=0
|
||||
if [ -n "$MAIN_HEAD" ] && [ "$MERGE_BASE" != "$MAIN_HEAD" ]; then
|
||||
BEHIND_MAIN=$(git rev-list --count "${MERGE_BASE}..${MAIN_HEAD}" 2>/dev/null || echo "0")
|
||||
fi
|
||||
|
||||
SUMMARY_ARGS="--output-summary /tmp/bench-summary.json"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --output-markdown /tmp/bench-comment.md"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --repo ${{ github.repository }}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-ref ${MERGE_BASE}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --branch-name ${BRANCH_NAME}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --branch-sha ${BRANCH_SHA}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-csv /tmp/bench-results-baseline/combined_latency.csv"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --branch-csv /tmp/bench-results-branch/combined_latency.csv"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --gas-csv /tmp/bench-results-branch/total_gas.csv"
|
||||
if [ "$BEHIND_MAIN" -gt 0 ]; then
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --behind-main $BEHIND_MAIN"
|
||||
fi
|
||||
# shellcheck disable=SC2086
|
||||
python3 .github/scripts/bench-reth-summary.py $SUMMARY_ARGS
|
||||
|
||||
- name: Generate charts
|
||||
if: success()
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
run: |
|
||||
CHART_ARGS="/tmp/bench-results-branch/combined_latency.csv --output-dir /tmp/bench-charts"
|
||||
CHART_ARGS="$CHART_ARGS --baseline /tmp/bench-results-baseline/combined_latency.csv"
|
||||
CHART_ARGS="$CHART_ARGS --branch-name ${BRANCH_NAME}"
|
||||
# shellcheck disable=SC2086
|
||||
uv run --with matplotlib python3 .github/scripts/bench-reth-charts.py $CHART_ARGS
|
||||
|
||||
- name: Upload results
|
||||
if: success()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: bench-reth-results
|
||||
path: |
|
||||
/tmp/bench-results-baseline/
|
||||
/tmp/bench-results-branch/
|
||||
/tmp/bench-summary.json
|
||||
/tmp/bench-charts/
|
||||
|
||||
- name: Push charts
|
||||
id: push-charts
|
||||
if: success()
|
||||
run: |
|
||||
PR_NUMBER=${{ github.event.issue.number }}
|
||||
RUN_ID=${{ github.run_id }}
|
||||
CHART_DIR="pr/${PR_NUMBER}/${RUN_ID}"
|
||||
|
||||
if git fetch origin bench-charts 2>/dev/null; then
|
||||
git checkout bench-charts
|
||||
else
|
||||
git checkout --orphan bench-charts
|
||||
git rm -rf . 2>/dev/null || true
|
||||
fi
|
||||
|
||||
mkdir -p "${CHART_DIR}"
|
||||
cp /tmp/bench-charts/*.png "${CHART_DIR}/"
|
||||
git add "${CHART_DIR}"
|
||||
git -c user.name="github-actions" -c user.email="github-actions@github.com" \
|
||||
commit -m "bench charts for PR #${PR_NUMBER} run ${RUN_ID}"
|
||||
git push origin bench-charts
|
||||
echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Compare & comment
|
||||
if: success()
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
|
||||
let comment = '';
|
||||
try {
|
||||
comment = fs.readFileSync('/tmp/bench-comment.md', 'utf8');
|
||||
} catch (e) {
|
||||
comment = '⚠️ Engine benchmark completed but failed to generate comparison.';
|
||||
}
|
||||
|
||||
const sha = '${{ steps.push-charts.outputs.sha }}';
|
||||
const prNumber = context.issue.number;
|
||||
const runId = '${{ github.run_id }}';
|
||||
const baseUrl = `https://raw.githubusercontent.com/${context.repo.owner}/${context.repo.repo}/${sha}/pr/${prNumber}/${runId}`;
|
||||
|
||||
const charts = [
|
||||
{ file: 'latency_throughput.png', label: 'Latency, Throughput & Diff' },
|
||||
{ file: 'wait_breakdown.png', label: 'Wait Time Breakdown' },
|
||||
{ file: 'gas_vs_latency.png', label: 'Gas vs Latency' },
|
||||
];
|
||||
|
||||
let chartMarkdown = '\n\n### Charts\n\n';
|
||||
for (const chart of charts) {
|
||||
chartMarkdown += `<details><summary>${chart.label}</summary>\n\n`;
|
||||
chartMarkdown += `\n\n`;
|
||||
chartMarkdown += `</details>\n\n`;
|
||||
}
|
||||
|
||||
comment += chartMarkdown;
|
||||
|
||||
const requestedBy = '${{ github.event.comment.user.login }}';
|
||||
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
||||
const body = `cc @${requestedBy}\n\n✅ Benchmark complete! [View run](${runUrl})\n\n${comment}`;
|
||||
const ackCommentId = '${{ steps.ack.outputs.comment-id }}';
|
||||
|
||||
if (ackCommentId) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: parseInt(ackCommentId),
|
||||
body,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body,
|
||||
});
|
||||
}
|
||||
|
||||
- name: Upload node log
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: reth-node-log
|
||||
path: |
|
||||
/tmp/reth-bench-node-baseline.log
|
||||
/tmp/reth-bench-node-branch.log
|
||||
|
||||
- name: Restore system settings
|
||||
if: always()
|
||||
run: |
|
||||
sudo systemctl start irqbalance cron atd 2>/dev/null || true
|
||||
|
||||
Reference in New Issue
Block a user