mirror of
https://github.com/paradigmxyz/reth.git
synced 2026-02-19 03:04:27 -05:00
ci(bench): rename main/branch to baseline/feature, add ref args (#22284)
Co-authored-by: Georgios Konstantopoulos <me@gakonst.com> Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
22
.github/scripts/bench-reth-build.sh
vendored
22
.github/scripts/bench-reth-build.sh
vendored
@@ -2,15 +2,15 @@
|
||||
#
|
||||
# Builds (or fetches from cache) reth binaries for benchmarking.
|
||||
#
|
||||
# Usage: bench-reth-build.sh <main|branch> <commit> [branch-sha]
|
||||
# Usage: bench-reth-build.sh <baseline|feature> <commit> [branch-sha]
|
||||
#
|
||||
# main — build/fetch the baseline binary at <commit> (merge-base)
|
||||
# branch — build/fetch the candidate binary + reth-bench at <commit>
|
||||
# optional branch-sha is the PR head commit for cache key
|
||||
# baseline — build/fetch the baseline binary at <commit> (merge-base)
|
||||
# feature — build/fetch the candidate binary + reth-bench at <commit>
|
||||
# optional branch-sha is the PR head commit for cache key
|
||||
#
|
||||
# Outputs:
|
||||
# main: target/profiling-baseline/reth
|
||||
# branch: target/profiling/reth, reth-bench installed to cargo bin
|
||||
# baseline: target/profiling-baseline/reth
|
||||
# feature: target/profiling/reth, reth-bench installed to cargo bin
|
||||
#
|
||||
# Required: mc (MinIO client) configured at /home/ubuntu/.mc
|
||||
set -euo pipefail
|
||||
@@ -20,16 +20,16 @@ MODE="$1"
|
||||
COMMIT="$2"
|
||||
|
||||
case "$MODE" in
|
||||
main)
|
||||
baseline|main)
|
||||
BUCKET="minio/reth-binaries/${COMMIT}"
|
||||
mkdir -p target/profiling-baseline
|
||||
|
||||
if $MC stat "${BUCKET}/reth" &>/dev/null; then
|
||||
echo "Cache hit for main (${COMMIT}), downloading binary..."
|
||||
echo "Cache hit for baseline (${COMMIT}), downloading binary..."
|
||||
$MC cp "${BUCKET}/reth" target/profiling-baseline/reth
|
||||
chmod +x target/profiling-baseline/reth
|
||||
else
|
||||
echo "Cache miss for main (${COMMIT}), building from source..."
|
||||
echo "Cache miss for baseline (${COMMIT}), building from source..."
|
||||
CURRENT_REF=$(git rev-parse HEAD)
|
||||
git checkout "${COMMIT}"
|
||||
cargo build --profile profiling --bin reth
|
||||
@@ -39,7 +39,7 @@ case "$MODE" in
|
||||
fi
|
||||
;;
|
||||
|
||||
branch)
|
||||
feature|branch)
|
||||
BRANCH_SHA="${3:-$COMMIT}"
|
||||
BUCKET="minio/reth-binaries/${BRANCH_SHA}"
|
||||
|
||||
@@ -60,7 +60,7 @@ case "$MODE" in
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Usage: $0 <main|branch> <commit> [branch-sha]"
|
||||
echo "Usage: $0 <baseline|feature> <commit> [branch-sha]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
24
.github/scripts/bench-reth-charts.py
vendored
24
.github/scripts/bench-reth-charts.py
vendored
@@ -53,7 +53,7 @@ def parse_combined_csv(path: str) -> list[dict]:
|
||||
|
||||
def plot_latency_and_throughput(
|
||||
feature: list[dict], baseline: list[dict] | None, out: Path,
|
||||
baseline_name: str = "main", branch_name: str = "branch",
|
||||
baseline_name: str = "baseline", feature_name: str = "feature",
|
||||
):
|
||||
num_plots = 3 if baseline else 2
|
||||
fig, axes = plt.subplots(num_plots, 1, figsize=(12, 4 * num_plots), sharex=True)
|
||||
@@ -76,14 +76,14 @@ def plot_latency_and_throughput(
|
||||
ax1.plot(base_x, base_lat, linewidth=0.8, label=baseline_name, alpha=0.7)
|
||||
ax2.plot(base_x, base_ggas, linewidth=0.8, label=baseline_name, alpha=0.7)
|
||||
|
||||
ax1.plot(feat_x, feat_lat, linewidth=0.8, label=branch_name)
|
||||
ax1.plot(feat_x, feat_lat, linewidth=0.8, label=feature_name)
|
||||
ax1.set_ylabel("Latency (ms)")
|
||||
ax1.set_title("newPayload Latency per Block")
|
||||
ax1.grid(True, alpha=0.3)
|
||||
if baseline:
|
||||
ax1.legend()
|
||||
|
||||
ax2.plot(feat_x, feat_ggas, linewidth=0.8, label=branch_name)
|
||||
ax2.plot(feat_x, feat_ggas, linewidth=0.8, label=feature_name)
|
||||
ax2.set_ylabel("Ggas/s")
|
||||
ax2.set_title("Execution Throughput per Block")
|
||||
ax2.grid(True, alpha=0.3)
|
||||
@@ -105,7 +105,7 @@ def plot_latency_and_throughput(
|
||||
ax3.bar(blocks, diffs, width=1.0, color=colors, alpha=0.7, edgecolor="none")
|
||||
ax3.axhline(0, color="black", linewidth=0.5)
|
||||
ax3.set_ylabel("Δ Latency (%)")
|
||||
ax3.set_title("Per-Block newPayload Latency Change (branch vs main)")
|
||||
ax3.set_title("Per-Block newPayload Latency Change (feature vs baseline)")
|
||||
ax3.grid(True, alpha=0.3, axis="y")
|
||||
|
||||
axes[-1].set_xlabel("Block Number")
|
||||
@@ -116,7 +116,7 @@ def plot_latency_and_throughput(
|
||||
|
||||
def plot_wait_breakdown(
|
||||
feature: list[dict], baseline: list[dict] | None, out: Path,
|
||||
baseline_name: str = "main", branch_name: str = "branch",
|
||||
baseline_name: str = "baseline", feature_name: str = "feature",
|
||||
):
|
||||
series = [
|
||||
("Persistence Wait", "persistence_wait_us"),
|
||||
@@ -135,7 +135,7 @@ def plot_wait_breakdown(
|
||||
fx = [r["block_number"] for r in feature if r[key] is not None]
|
||||
fy = [r[key] / 1_000 for r in feature if r[key] is not None]
|
||||
if fx:
|
||||
ax.plot(fx, fy, linewidth=0.8, label=branch_name)
|
||||
ax.plot(fx, fy, linewidth=0.8, label=feature_name)
|
||||
|
||||
ax.set_ylabel("ms")
|
||||
ax.set_title(label)
|
||||
@@ -163,7 +163,7 @@ def _add_regression(ax, x, y, color, label):
|
||||
|
||||
def plot_gas_vs_latency(
|
||||
feature: list[dict], baseline: list[dict] | None, out: Path,
|
||||
baseline_name: str = "main", branch_name: str = "branch",
|
||||
baseline_name: str = "baseline", feature_name: str = "feature",
|
||||
):
|
||||
fig, ax = plt.subplots(figsize=(8, 6))
|
||||
|
||||
@@ -176,7 +176,7 @@ def plot_gas_vs_latency(
|
||||
fgas = [r["gas_used"] / 1_000_000 for r in feature]
|
||||
flat = [r["new_payload_latency_us"] / 1_000 for r in feature]
|
||||
ax.scatter(fgas, flat, s=8, alpha=0.6)
|
||||
_add_regression(ax, fgas, flat, "tab:orange", branch_name)
|
||||
_add_regression(ax, fgas, flat, "tab:orange", feature_name)
|
||||
|
||||
ax.set_xlabel("Gas Used (Mgas)")
|
||||
ax.set_ylabel("newPayload Latency (ms)")
|
||||
@@ -195,10 +195,10 @@ def main():
|
||||
"--output-dir", required=True, help="Output directory for PNG charts"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--baseline", help="Path to baseline (main) combined_latency.csv"
|
||||
"--baseline", help="Path to baseline combined_latency.csv"
|
||||
)
|
||||
parser.add_argument("--baseline-name", default="main", help="Label for baseline")
|
||||
parser.add_argument("--branch-name", default="branch", help="Label for branch")
|
||||
parser.add_argument("--baseline-name", default="baseline", help="Label for baseline")
|
||||
parser.add_argument("--feature-name", "--branch-name", default="feature", help="Label for feature")
|
||||
args = parser.parse_args()
|
||||
|
||||
feature = parse_combined_csv(args.combined_csv)
|
||||
@@ -220,7 +220,7 @@ def main():
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
bname = args.baseline_name
|
||||
fname = args.branch_name
|
||||
fname = args.feature_name
|
||||
plot_latency_and_throughput(feature, baseline, out_dir / "latency_throughput.png", bname, fname)
|
||||
plot_wait_breakdown(feature, baseline, out_dir / "wait_breakdown.png", bname, fname)
|
||||
plot_gas_vs_latency(feature, baseline, out_dir / "gas_vs_latency.png", bname, fname)
|
||||
|
||||
104
.github/scripts/bench-reth-summary.py
vendored
104
.github/scripts/bench-reth-summary.py
vendored
@@ -8,12 +8,12 @@ Usage:
|
||||
--baseline-csv <baseline_combined.csv> \
|
||||
[--repo <owner/repo>] \
|
||||
[--baseline-ref <sha>] \
|
||||
[--branch-name <name>] \
|
||||
[--branch-sha <sha>]
|
||||
[--feature-name <name>] \
|
||||
[--feature-sha <sha>]
|
||||
|
||||
Generates a paired statistical comparison between baseline (main) and branch.
|
||||
Generates a paired statistical comparison between baseline and feature.
|
||||
Matches blocks by number and computes per-block diffs to cancel out gas
|
||||
variance. Fails if baseline or branch CSV is missing or empty.
|
||||
variance. Fails if baseline or feature CSV is missing or empty.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -113,25 +113,25 @@ def compute_stats(combined: list[dict]) -> dict:
|
||||
|
||||
|
||||
def _paired_data(
|
||||
baseline: list[dict], branch: list[dict]
|
||||
baseline: list[dict], feature: list[dict]
|
||||
) -> tuple[list[tuple[float, float]], list[float], list[float]]:
|
||||
"""Match blocks and return paired latencies and per-block diffs.
|
||||
|
||||
Returns:
|
||||
pairs: list of (baseline_ms, branch_ms) tuples
|
||||
lat_diffs_ms: list of branch − baseline latency diffs in ms
|
||||
mgas_diffs: list of branch − baseline Mgas/s diffs
|
||||
pairs: list of (baseline_ms, feature_ms) tuples
|
||||
lat_diffs_ms: list of feature − baseline latency diffs in ms
|
||||
mgas_diffs: list of feature − baseline Mgas/s diffs
|
||||
"""
|
||||
baseline_by_block = {r["block_number"]: r for r in baseline}
|
||||
branch_by_block = {r["block_number"]: r for r in branch}
|
||||
common_blocks = sorted(set(baseline_by_block) & set(branch_by_block))
|
||||
feature_by_block = {r["block_number"]: r for r in feature}
|
||||
common_blocks = sorted(set(baseline_by_block) & set(feature_by_block))
|
||||
|
||||
pairs = []
|
||||
lat_diffs_ms = []
|
||||
mgas_diffs = []
|
||||
for bn in common_blocks:
|
||||
b = baseline_by_block[bn]
|
||||
f = branch_by_block[bn]
|
||||
f = feature_by_block[bn]
|
||||
b_ms = b["new_payload_latency_us"] / 1_000
|
||||
f_ms = f["new_payload_latency_us"] / 1_000
|
||||
pairs.append((b_ms, f_ms))
|
||||
@@ -148,18 +148,18 @@ def _paired_data(
|
||||
|
||||
def compute_paired_stats(
|
||||
baseline_runs: list[list[dict]],
|
||||
branch_runs: list[list[dict]],
|
||||
feature_runs: list[list[dict]],
|
||||
) -> dict:
|
||||
"""Compute paired statistics between baseline and branch runs.
|
||||
"""Compute paired statistics between baseline and feature runs.
|
||||
|
||||
Each pair (baseline_runs[i], branch_runs[i]) produces per-block diffs.
|
||||
Each pair (baseline_runs[i], feature_runs[i]) produces per-block diffs.
|
||||
All diffs are pooled for the final CI.
|
||||
"""
|
||||
all_pairs = []
|
||||
all_lat_diffs = []
|
||||
all_mgas_diffs = []
|
||||
for baseline, branch in zip(baseline_runs, branch_runs):
|
||||
pairs, lat_diffs, mgas_diffs = _paired_data(baseline, branch)
|
||||
for baseline, feature in zip(baseline_runs, feature_runs):
|
||||
pairs, lat_diffs, mgas_diffs = _paired_data(baseline, feature)
|
||||
all_pairs.extend(pairs)
|
||||
all_lat_diffs.extend(lat_diffs)
|
||||
all_mgas_diffs.extend(mgas_diffs)
|
||||
@@ -175,10 +175,10 @@ def compute_paired_stats(
|
||||
|
||||
# Bootstrap CI on difference-of-percentiles (resample paired blocks)
|
||||
base_lats = sorted([p[0] for p in all_pairs])
|
||||
branch_lats = sorted([p[1] for p in all_pairs])
|
||||
p50_diff = percentile(branch_lats, 50) - percentile(base_lats, 50)
|
||||
p90_diff = percentile(branch_lats, 90) - percentile(base_lats, 90)
|
||||
p99_diff = percentile(branch_lats, 99) - percentile(base_lats, 99)
|
||||
feature_lats = sorted([p[1] for p in all_pairs])
|
||||
p50_diff = percentile(feature_lats, 50) - percentile(base_lats, 50)
|
||||
p90_diff = percentile(feature_lats, 90) - percentile(base_lats, 90)
|
||||
p99_diff = percentile(feature_lats, 99) - percentile(base_lats, 99)
|
||||
|
||||
rng = random.Random(42)
|
||||
p50_boot, p90_boot, p99_boot = [], [], []
|
||||
@@ -268,10 +268,11 @@ def generate_comparison_table(
|
||||
paired: dict,
|
||||
repo: str,
|
||||
baseline_ref: str,
|
||||
branch_name: str,
|
||||
branch_sha: str,
|
||||
baseline_name: str,
|
||||
feature_name: str,
|
||||
feature_sha: str,
|
||||
) -> str:
|
||||
"""Generate a markdown comparison table between baseline (main) and branch."""
|
||||
"""Generate a markdown comparison table between baseline and feature."""
|
||||
n = paired["n"]
|
||||
|
||||
def pct(base: float, feat: float) -> float:
|
||||
@@ -294,11 +295,11 @@ def generate_comparison_table(
|
||||
mgas_ci_pct = paired["mgas_ci"] / run1["mean_mgas_s"] * 100.0 if run1["mean_mgas_s"] > 0 else 0.0
|
||||
|
||||
base_url = f"https://github.com/{repo}/commit"
|
||||
baseline_label = f"[`main`]({base_url}/{baseline_ref})"
|
||||
branch_label = f"[`{branch_name}`]({base_url}/{branch_sha})"
|
||||
baseline_label = f"[`{baseline_name}`]({base_url}/{baseline_ref})"
|
||||
feature_label = f"[`{feature_name}`]({base_url}/{feature_sha})"
|
||||
|
||||
lines = [
|
||||
f"| Metric | {baseline_label} | {branch_label} | Change |",
|
||||
f"| Metric | {baseline_label} | {feature_label} | Change |",
|
||||
"|--------|------|--------|--------|",
|
||||
f"| Mean | {fmt_ms(run1['mean_ms'])} | {fmt_ms(run2['mean_ms'])} | {change_str(mean_pct, lat_ci_pct, lower_is_better=True)} |",
|
||||
f"| StdDev | {fmt_ms(run1['stddev_ms'])} | {fmt_ms(run2['stddev_ms'])} | |",
|
||||
@@ -314,15 +315,15 @@ def generate_comparison_table(
|
||||
|
||||
def generate_markdown(
|
||||
summary: dict, comparison_table: str,
|
||||
behind_main: int = 0, repo: str = "", baseline_ref: str = "",
|
||||
behind_baseline: int = 0, repo: str = "", baseline_ref: str = "", baseline_name: str = "",
|
||||
) -> str:
|
||||
"""Generate a markdown comment body."""
|
||||
lines = ["## Benchmark Results", "", comparison_table]
|
||||
if behind_main > 0:
|
||||
s = "s" if behind_main > 1 else ""
|
||||
diff_link = f"https://github.com/{repo}/compare/{baseline_ref[:12]}...main"
|
||||
if behind_baseline > 0:
|
||||
s = "s" if behind_baseline > 1 else ""
|
||||
diff_link = f"https://github.com/{repo}/compare/{baseline_ref[:12]}...{baseline_name}"
|
||||
lines.append("")
|
||||
lines.append(f"> ⚠️ Branch is [**{behind_main} commit{s} behind `main`**]({diff_link}). Consider rebasing for accurate results.")
|
||||
lines.append(f"> ⚠️ Feature is [**{behind_baseline} commit{s} behind `{baseline_name}`**]({diff_link}). Consider rebasing for accurate results.")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@@ -333,8 +334,8 @@ def main():
|
||||
help="Baseline combined_latency.csv files (A1, A2)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--branch-csv", nargs="+", required=True,
|
||||
help="Branch combined_latency.csv files (B1, B2)",
|
||||
"--feature-csv", "--branch-csv", nargs="+", required=True,
|
||||
help="Feature combined_latency.csv files (B1, B2)",
|
||||
)
|
||||
parser.add_argument("--gas-csv", required=True, help="Path to total_gas.csv")
|
||||
parser.add_argument(
|
||||
@@ -345,65 +346,68 @@ def main():
|
||||
"--repo", default="paradigmxyz/reth", help="GitHub repo (owner/name)"
|
||||
)
|
||||
parser.add_argument("--baseline-ref", default=None, help="Baseline commit SHA")
|
||||
parser.add_argument("--branch-name", default=None, help="Branch name")
|
||||
parser.add_argument("--branch-sha", default=None, help="Branch commit SHA")
|
||||
parser.add_argument("--behind-main", type=int, default=0, help="Commits behind main")
|
||||
parser.add_argument("--baseline-name", default=None, help="Baseline display name")
|
||||
parser.add_argument("--feature-name", "--branch-name", default=None, help="Feature branch name")
|
||||
parser.add_argument("--feature-ref", "--branch-sha", "--feature-sha", default=None, help="Feature commit SHA")
|
||||
parser.add_argument("--behind-baseline", "--behind-main", type=int, default=0, help="Commits behind baseline")
|
||||
args = parser.parse_args()
|
||||
|
||||
if len(args.baseline_csv) != len(args.branch_csv):
|
||||
print("Must provide equal number of baseline and branch CSVs", file=sys.stderr)
|
||||
if len(args.baseline_csv) != len(args.feature_csv):
|
||||
print("Must provide equal number of baseline and feature CSVs", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
baseline_runs = []
|
||||
branch_runs = []
|
||||
feature_runs = []
|
||||
for path in args.baseline_csv:
|
||||
data = parse_combined_csv(path)
|
||||
if not data:
|
||||
print(f"No results in {path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
baseline_runs.append(data)
|
||||
for path in args.branch_csv:
|
||||
for path in args.feature_csv:
|
||||
data = parse_combined_csv(path)
|
||||
if not data:
|
||||
print(f"No results in {path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
branch_runs.append(data)
|
||||
feature_runs.append(data)
|
||||
|
||||
gas = parse_gas_csv(args.gas_csv)
|
||||
|
||||
all_baseline = [r for run in baseline_runs for r in run]
|
||||
all_branch = [r for run in branch_runs for r in run]
|
||||
all_feature = [r for run in feature_runs for r in run]
|
||||
|
||||
summary = compute_summary(all_branch, gas)
|
||||
summary = compute_summary(all_feature, gas)
|
||||
with open(args.output_summary, "w") as f:
|
||||
json.dump(summary, f, indent=2)
|
||||
print(f"Summary written to {args.output_summary}")
|
||||
|
||||
baseline_stats = compute_stats(all_baseline)
|
||||
branch_stats = compute_stats(all_branch)
|
||||
paired_stats = compute_paired_stats(baseline_runs, branch_runs)
|
||||
feature_stats = compute_stats(all_feature)
|
||||
paired_stats = compute_paired_stats(baseline_runs, feature_runs)
|
||||
|
||||
if not paired_stats:
|
||||
print("No common blocks between baseline and branch runs", file=sys.stderr)
|
||||
print("No common blocks between baseline and feature runs", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
comparison_table = generate_comparison_table(
|
||||
baseline_stats,
|
||||
branch_stats,
|
||||
feature_stats,
|
||||
paired_stats,
|
||||
repo=args.repo,
|
||||
baseline_ref=args.baseline_ref or "main",
|
||||
branch_name=args.branch_name or "branch",
|
||||
branch_sha=args.branch_sha or "unknown",
|
||||
baseline_name=args.baseline_name or "baseline",
|
||||
feature_name=args.feature_name or "feature",
|
||||
feature_sha=args.feature_ref or "unknown",
|
||||
)
|
||||
print(f"Generated comparison ({paired_stats['n']} paired blocks, "
|
||||
f"mean diff {paired_stats['mean_diff_ms']:+.3f}ms ± {paired_stats['ci_ms']:.3f}ms)")
|
||||
|
||||
markdown = generate_markdown(
|
||||
summary, comparison_table,
|
||||
behind_main=args.behind_main,
|
||||
behind_baseline=args.behind_baseline,
|
||||
repo=args.repo,
|
||||
baseline_ref=args.baseline_ref or "",
|
||||
baseline_name=args.baseline_name or "main",
|
||||
)
|
||||
|
||||
with open(args.output_markdown, "w") as f:
|
||||
|
||||
135
.github/workflows/bench.yml
vendored
135
.github/workflows/bench.yml
vendored
@@ -3,7 +3,7 @@
|
||||
# The reth-bench job replays real blocks via the Engine API against a reth node
|
||||
# backed by a local snapshot managed with schelk.
|
||||
#
|
||||
# It runs the main (baseline) binary and the branch (candidate) binary on the
|
||||
# It runs the baseline binary and the feature (candidate) binary on the
|
||||
# same block range (snapshot recovered between runs) to compare performance.
|
||||
|
||||
on:
|
||||
@@ -108,8 +108,9 @@ jobs:
|
||||
with:
|
||||
script: |
|
||||
const body = context.payload.comment.body.trim();
|
||||
const known = new Set(['blocks', 'warmup']);
|
||||
const defaults = { blocks: '500', warmup: '100' };
|
||||
const intArgs = new Set(['blocks', 'warmup']);
|
||||
const refArgs = new Set(['baseline', 'feature']);
|
||||
const defaults = { blocks: '500', warmup: '100', baseline: '', feature: '' };
|
||||
const unknown = [];
|
||||
const invalid = [];
|
||||
const args = body.replace(/^derek bench\s*/, '');
|
||||
@@ -121,19 +122,27 @@ jobs:
|
||||
}
|
||||
const key = part.slice(0, eq);
|
||||
const value = part.slice(eq + 1);
|
||||
if (!known.has(key)) {
|
||||
unknown.push(key);
|
||||
} else if (!/^\d+$/.test(value)) {
|
||||
invalid.push(`\`${key}=${value}\` (must be a positive integer)`);
|
||||
if (intArgs.has(key)) {
|
||||
if (!/^\d+$/.test(value)) {
|
||||
invalid.push(`\`${key}=${value}\` (must be a positive integer)`);
|
||||
} else {
|
||||
defaults[key] = value;
|
||||
}
|
||||
} else if (refArgs.has(key)) {
|
||||
if (!value) {
|
||||
invalid.push(`\`${key}=\` (must be a git ref)`);
|
||||
} else {
|
||||
defaults[key] = value;
|
||||
}
|
||||
} else {
|
||||
defaults[key] = value;
|
||||
unknown.push(key);
|
||||
}
|
||||
}
|
||||
const errors = [];
|
||||
if (unknown.length) errors.push(`Unknown argument(s): \`${unknown.join('`, `')}\``);
|
||||
if (invalid.length) errors.push(`Invalid value(s): ${invalid.join(', ')}`);
|
||||
if (errors.length) {
|
||||
const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`derek bench [blocks=N] [warmup=N]\``;
|
||||
const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`derek bench [blocks=N] [warmup=N] [baseline=REF] [feature=REF]\``;
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
@@ -145,6 +154,8 @@ jobs:
|
||||
}
|
||||
core.setOutput('blocks', defaults.blocks);
|
||||
core.setOutput('warmup', defaults.warmup);
|
||||
core.setOutput('baseline', defaults.baseline);
|
||||
core.setOutput('feature', defaults.feature);
|
||||
core.exportVariable('BENCH_BLOCKS', defaults.blocks);
|
||||
core.exportVariable('BENCH_WARMUP_BLOCKS', defaults.warmup);
|
||||
|
||||
@@ -163,11 +174,13 @@ jobs:
|
||||
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
||||
const blocks = '${{ steps.args.outputs.blocks }}';
|
||||
const warmup = '${{ steps.args.outputs.warmup }}';
|
||||
const baseline = '${{ steps.args.outputs.baseline }}' || 'merge-base';
|
||||
const feature = '${{ steps.args.outputs.feature }}' || 'PR head';
|
||||
const { data: comment } = await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Building binaries...\n\n**Config:** ${blocks} blocks, ${warmup} warmup blocks`,
|
||||
body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Building binaries...\n\n**Config:** ${blocks} blocks, ${warmup} warmup blocks, baseline: \`${baseline}\`, feature: \`${feature}\``,
|
||||
});
|
||||
core.setOutput('comment-id', comment.id);
|
||||
- uses: actions/checkout@v6
|
||||
@@ -197,14 +210,52 @@ jobs:
|
||||
echo "All dependencies found"
|
||||
|
||||
# Build binaries
|
||||
- name: Fetch or build main binaries
|
||||
- name: Resolve PR head branch
|
||||
id: pr-info
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const { data: pr } = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: context.issue.number,
|
||||
});
|
||||
core.setOutput('head-ref', pr.head.ref);
|
||||
core.setOutput('head-sha', pr.head.sha);
|
||||
|
||||
- name: Resolve refs
|
||||
id: refs
|
||||
run: |
|
||||
MERGE_BASE=$(git merge-base HEAD origin/main 2>/dev/null || echo "${{ github.sha }}")
|
||||
.github/scripts/bench-reth-build.sh main "$MERGE_BASE"
|
||||
- name: Fetch or build branch binaries
|
||||
run: |
|
||||
BRANCH_SHA="${{ github.sha }}"
|
||||
.github/scripts/bench-reth-build.sh branch "$BRANCH_SHA"
|
||||
BASELINE_ARG="${{ steps.args.outputs.baseline }}"
|
||||
FEATURE_ARG="${{ steps.args.outputs.feature }}"
|
||||
|
||||
if [ -n "$BASELINE_ARG" ]; then
|
||||
git fetch origin "$BASELINE_ARG" --quiet 2>/dev/null || true
|
||||
BASELINE_REF=$(git rev-parse "$BASELINE_ARG" 2>/dev/null || git rev-parse "origin/$BASELINE_ARG" 2>/dev/null)
|
||||
BASELINE_NAME="$BASELINE_ARG"
|
||||
else
|
||||
BASELINE_REF=$(git merge-base HEAD origin/main 2>/dev/null || echo "${{ github.sha }}")
|
||||
BASELINE_NAME="main"
|
||||
fi
|
||||
|
||||
if [ -n "$FEATURE_ARG" ]; then
|
||||
git fetch origin "$FEATURE_ARG" --quiet 2>/dev/null || true
|
||||
FEATURE_REF=$(git rev-parse "$FEATURE_ARG" 2>/dev/null || git rev-parse "origin/$FEATURE_ARG" 2>/dev/null)
|
||||
FEATURE_NAME="$FEATURE_ARG"
|
||||
else
|
||||
FEATURE_REF="${{ steps.pr-info.outputs.head-sha }}"
|
||||
FEATURE_NAME="${{ steps.pr-info.outputs.head-ref }}"
|
||||
fi
|
||||
|
||||
echo "baseline-ref=$BASELINE_REF" >> "$GITHUB_OUTPUT"
|
||||
echo "baseline-name=$BASELINE_NAME" >> "$GITHUB_OUTPUT"
|
||||
echo "feature-ref=$FEATURE_REF" >> "$GITHUB_OUTPUT"
|
||||
echo "feature-name=$FEATURE_NAME" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Fetch or build baseline binaries
|
||||
run: .github/scripts/bench-reth-build.sh baseline "${{ steps.refs.outputs.baseline-ref }}"
|
||||
- name: Fetch or build feature binaries
|
||||
run: .github/scripts/bench-reth-build.sh feature "${{ steps.refs.outputs.feature-ref }}"
|
||||
|
||||
# System tuning for reproducible benchmarks
|
||||
- name: System setup
|
||||
@@ -267,38 +318,38 @@ jobs:
|
||||
- name: "Run benchmark: baseline"
|
||||
run: taskset -c 0 .github/scripts/bench-reth-run.sh baseline target/profiling-baseline/reth /tmp/bench-results-baseline
|
||||
|
||||
- name: "Run benchmark: branch"
|
||||
run: taskset -c 0 .github/scripts/bench-reth-run.sh branch target/profiling/reth /tmp/bench-results-branch
|
||||
- name: "Run benchmark: feature"
|
||||
run: taskset -c 0 .github/scripts/bench-reth-run.sh feature target/profiling/reth /tmp/bench-results-feature
|
||||
|
||||
# Results & charts
|
||||
- name: Parse results
|
||||
id: results
|
||||
if: success()
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
BRANCH_SHA: ${{ github.sha }}
|
||||
BASELINE_REF: ${{ steps.refs.outputs.baseline-ref }}
|
||||
BASELINE_NAME: ${{ steps.refs.outputs.baseline-name }}
|
||||
FEATURE_NAME: ${{ steps.refs.outputs.feature-name }}
|
||||
FEATURE_REF: ${{ steps.refs.outputs.feature-ref }}
|
||||
run: |
|
||||
git fetch origin main --quiet
|
||||
# Use the actual PR head commit, not HEAD (which is the merge commit
|
||||
# refs/pull/N/merge and always has origin/main as a parent).
|
||||
MERGE_BASE=$(git merge-base "${BRANCH_SHA}" origin/main 2>/dev/null || echo "${{ github.sha }}")
|
||||
MAIN_HEAD=$(git rev-parse origin/main 2>/dev/null || echo "")
|
||||
BEHIND_MAIN=0
|
||||
if [ -n "$MAIN_HEAD" ] && [ "$MERGE_BASE" != "$MAIN_HEAD" ]; then
|
||||
BEHIND_MAIN=$(git rev-list --count "${MERGE_BASE}..${MAIN_HEAD}" 2>/dev/null || echo "0")
|
||||
git fetch origin "${BASELINE_NAME}" --quiet 2>/dev/null || true
|
||||
BASELINE_HEAD=$(git rev-parse "origin/${BASELINE_NAME}" 2>/dev/null || echo "")
|
||||
BEHIND_BASELINE=0
|
||||
if [ -n "$BASELINE_HEAD" ] && [ "$BASELINE_REF" != "$BASELINE_HEAD" ]; then
|
||||
BEHIND_BASELINE=$(git rev-list --count "${BASELINE_REF}..${BASELINE_HEAD}" 2>/dev/null || echo "0")
|
||||
fi
|
||||
|
||||
SUMMARY_ARGS="--output-summary /tmp/bench-summary.json"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --output-markdown /tmp/bench-comment.md"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --repo ${{ github.repository }}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-ref ${MERGE_BASE}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --branch-name ${BRANCH_NAME}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --branch-sha ${BRANCH_SHA}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-ref ${BASELINE_REF}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-name ${BASELINE_NAME}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --feature-name ${FEATURE_NAME}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --feature-ref ${FEATURE_REF}"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-csv /tmp/bench-results-baseline/combined_latency.csv"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --branch-csv /tmp/bench-results-branch/combined_latency.csv"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --gas-csv /tmp/bench-results-branch/total_gas.csv"
|
||||
if [ "$BEHIND_MAIN" -gt 0 ]; then
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --behind-main $BEHIND_MAIN"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --feature-csv /tmp/bench-results-feature/combined_latency.csv"
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --gas-csv /tmp/bench-results-feature/total_gas.csv"
|
||||
if [ "$BEHIND_BASELINE" -gt 0 ]; then
|
||||
SUMMARY_ARGS="$SUMMARY_ARGS --behind-baseline $BEHIND_BASELINE"
|
||||
fi
|
||||
# shellcheck disable=SC2086
|
||||
python3 .github/scripts/bench-reth-summary.py $SUMMARY_ARGS
|
||||
@@ -306,11 +357,13 @@ jobs:
|
||||
- name: Generate charts
|
||||
if: success()
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
BASELINE_NAME: ${{ steps.refs.outputs.baseline-name }}
|
||||
FEATURE_NAME: ${{ steps.refs.outputs.feature-name }}
|
||||
run: |
|
||||
CHART_ARGS="/tmp/bench-results-branch/combined_latency.csv --output-dir /tmp/bench-charts"
|
||||
CHART_ARGS="/tmp/bench-results-feature/combined_latency.csv --output-dir /tmp/bench-charts"
|
||||
CHART_ARGS="$CHART_ARGS --baseline /tmp/bench-results-baseline/combined_latency.csv"
|
||||
CHART_ARGS="$CHART_ARGS --branch-name ${BRANCH_NAME}"
|
||||
CHART_ARGS="$CHART_ARGS --baseline-name ${BASELINE_NAME}"
|
||||
CHART_ARGS="$CHART_ARGS --feature-name ${FEATURE_NAME}"
|
||||
# shellcheck disable=SC2086
|
||||
uv run --with matplotlib python3 .github/scripts/bench-reth-charts.py $CHART_ARGS
|
||||
|
||||
@@ -321,7 +374,7 @@ jobs:
|
||||
name: bench-reth-results
|
||||
path: |
|
||||
/tmp/bench-results-baseline/
|
||||
/tmp/bench-results-branch/
|
||||
/tmp/bench-results-feature/
|
||||
/tmp/bench-summary.json
|
||||
/tmp/bench-charts/
|
||||
|
||||
@@ -410,7 +463,7 @@ jobs:
|
||||
name: reth-node-log
|
||||
path: |
|
||||
/tmp/reth-bench-node-baseline.log
|
||||
/tmp/reth-bench-node-branch.log
|
||||
/tmp/reth-bench-node-feature.log
|
||||
|
||||
- name: Restore system settings
|
||||
if: always()
|
||||
|
||||
Reference in New Issue
Block a user