ci(bench): rename main/branch to baseline/feature, add ref args (#22284)

Co-authored-by: Georgios Konstantopoulos <me@gakonst.com>
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Alexey Shekhirin
2026-02-17 23:00:01 +00:00
committed by GitHub
parent aeb2c6e731
commit b49cadb346
4 changed files with 171 additions and 114 deletions

View File

@@ -2,15 +2,15 @@
#
# Builds (or fetches from cache) reth binaries for benchmarking.
#
# Usage: bench-reth-build.sh <main|branch> <commit> [branch-sha]
# Usage: bench-reth-build.sh <baseline|feature> <commit> [branch-sha]
#
# main — build/fetch the baseline binary at <commit> (merge-base)
# branch — build/fetch the candidate binary + reth-bench at <commit>
# optional branch-sha is the PR head commit for cache key
# baseline — build/fetch the baseline binary at <commit> (merge-base)
# feature — build/fetch the candidate binary + reth-bench at <commit>
# optional branch-sha is the PR head commit for cache key
#
# Outputs:
# main: target/profiling-baseline/reth
# branch: target/profiling/reth, reth-bench installed to cargo bin
# baseline: target/profiling-baseline/reth
# feature: target/profiling/reth, reth-bench installed to cargo bin
#
# Required: mc (MinIO client) configured at /home/ubuntu/.mc
set -euo pipefail
@@ -20,16 +20,16 @@ MODE="$1"
COMMIT="$2"
case "$MODE" in
main)
baseline|main)
BUCKET="minio/reth-binaries/${COMMIT}"
mkdir -p target/profiling-baseline
if $MC stat "${BUCKET}/reth" &>/dev/null; then
echo "Cache hit for main (${COMMIT}), downloading binary..."
echo "Cache hit for baseline (${COMMIT}), downloading binary..."
$MC cp "${BUCKET}/reth" target/profiling-baseline/reth
chmod +x target/profiling-baseline/reth
else
echo "Cache miss for main (${COMMIT}), building from source..."
echo "Cache miss for baseline (${COMMIT}), building from source..."
CURRENT_REF=$(git rev-parse HEAD)
git checkout "${COMMIT}"
cargo build --profile profiling --bin reth
@@ -39,7 +39,7 @@ case "$MODE" in
fi
;;
branch)
feature|branch)
BRANCH_SHA="${3:-$COMMIT}"
BUCKET="minio/reth-binaries/${BRANCH_SHA}"
@@ -60,7 +60,7 @@ case "$MODE" in
;;
*)
echo "Usage: $0 <main|branch> <commit> [branch-sha]"
echo "Usage: $0 <baseline|feature> <commit> [branch-sha]"
exit 1
;;
esac

View File

@@ -53,7 +53,7 @@ def parse_combined_csv(path: str) -> list[dict]:
def plot_latency_and_throughput(
feature: list[dict], baseline: list[dict] | None, out: Path,
baseline_name: str = "main", branch_name: str = "branch",
baseline_name: str = "baseline", feature_name: str = "feature",
):
num_plots = 3 if baseline else 2
fig, axes = plt.subplots(num_plots, 1, figsize=(12, 4 * num_plots), sharex=True)
@@ -76,14 +76,14 @@ def plot_latency_and_throughput(
ax1.plot(base_x, base_lat, linewidth=0.8, label=baseline_name, alpha=0.7)
ax2.plot(base_x, base_ggas, linewidth=0.8, label=baseline_name, alpha=0.7)
ax1.plot(feat_x, feat_lat, linewidth=0.8, label=branch_name)
ax1.plot(feat_x, feat_lat, linewidth=0.8, label=feature_name)
ax1.set_ylabel("Latency (ms)")
ax1.set_title("newPayload Latency per Block")
ax1.grid(True, alpha=0.3)
if baseline:
ax1.legend()
ax2.plot(feat_x, feat_ggas, linewidth=0.8, label=branch_name)
ax2.plot(feat_x, feat_ggas, linewidth=0.8, label=feature_name)
ax2.set_ylabel("Ggas/s")
ax2.set_title("Execution Throughput per Block")
ax2.grid(True, alpha=0.3)
@@ -105,7 +105,7 @@ def plot_latency_and_throughput(
ax3.bar(blocks, diffs, width=1.0, color=colors, alpha=0.7, edgecolor="none")
ax3.axhline(0, color="black", linewidth=0.5)
ax3.set_ylabel("Δ Latency (%)")
ax3.set_title("Per-Block newPayload Latency Change (branch vs main)")
ax3.set_title("Per-Block newPayload Latency Change (feature vs baseline)")
ax3.grid(True, alpha=0.3, axis="y")
axes[-1].set_xlabel("Block Number")
@@ -116,7 +116,7 @@ def plot_latency_and_throughput(
def plot_wait_breakdown(
feature: list[dict], baseline: list[dict] | None, out: Path,
baseline_name: str = "main", branch_name: str = "branch",
baseline_name: str = "baseline", feature_name: str = "feature",
):
series = [
("Persistence Wait", "persistence_wait_us"),
@@ -135,7 +135,7 @@ def plot_wait_breakdown(
fx = [r["block_number"] for r in feature if r[key] is not None]
fy = [r[key] / 1_000 for r in feature if r[key] is not None]
if fx:
ax.plot(fx, fy, linewidth=0.8, label=branch_name)
ax.plot(fx, fy, linewidth=0.8, label=feature_name)
ax.set_ylabel("ms")
ax.set_title(label)
@@ -163,7 +163,7 @@ def _add_regression(ax, x, y, color, label):
def plot_gas_vs_latency(
feature: list[dict], baseline: list[dict] | None, out: Path,
baseline_name: str = "main", branch_name: str = "branch",
baseline_name: str = "baseline", feature_name: str = "feature",
):
fig, ax = plt.subplots(figsize=(8, 6))
@@ -176,7 +176,7 @@ def plot_gas_vs_latency(
fgas = [r["gas_used"] / 1_000_000 for r in feature]
flat = [r["new_payload_latency_us"] / 1_000 for r in feature]
ax.scatter(fgas, flat, s=8, alpha=0.6)
_add_regression(ax, fgas, flat, "tab:orange", branch_name)
_add_regression(ax, fgas, flat, "tab:orange", feature_name)
ax.set_xlabel("Gas Used (Mgas)")
ax.set_ylabel("newPayload Latency (ms)")
@@ -195,10 +195,10 @@ def main():
"--output-dir", required=True, help="Output directory for PNG charts"
)
parser.add_argument(
"--baseline", help="Path to baseline (main) combined_latency.csv"
"--baseline", help="Path to baseline combined_latency.csv"
)
parser.add_argument("--baseline-name", default="main", help="Label for baseline")
parser.add_argument("--branch-name", default="branch", help="Label for branch")
parser.add_argument("--baseline-name", default="baseline", help="Label for baseline")
parser.add_argument("--feature-name", "--branch-name", default="feature", help="Label for feature")
args = parser.parse_args()
feature = parse_combined_csv(args.combined_csv)
@@ -220,7 +220,7 @@ def main():
out_dir.mkdir(parents=True, exist_ok=True)
bname = args.baseline_name
fname = args.branch_name
fname = args.feature_name
plot_latency_and_throughput(feature, baseline, out_dir / "latency_throughput.png", bname, fname)
plot_wait_breakdown(feature, baseline, out_dir / "wait_breakdown.png", bname, fname)
plot_gas_vs_latency(feature, baseline, out_dir / "gas_vs_latency.png", bname, fname)

View File

@@ -8,12 +8,12 @@ Usage:
--baseline-csv <baseline_combined.csv> \
[--repo <owner/repo>] \
[--baseline-ref <sha>] \
[--branch-name <name>] \
[--branch-sha <sha>]
[--feature-name <name>] \
[--feature-sha <sha>]
Generates a paired statistical comparison between baseline (main) and branch.
Generates a paired statistical comparison between baseline and feature.
Matches blocks by number and computes per-block diffs to cancel out gas
variance. Fails if baseline or branch CSV is missing or empty.
variance. Fails if baseline or feature CSV is missing or empty.
"""
import argparse
@@ -113,25 +113,25 @@ def compute_stats(combined: list[dict]) -> dict:
def _paired_data(
baseline: list[dict], branch: list[dict]
baseline: list[dict], feature: list[dict]
) -> tuple[list[tuple[float, float]], list[float], list[float]]:
"""Match blocks and return paired latencies and per-block diffs.
Returns:
pairs: list of (baseline_ms, branch_ms) tuples
lat_diffs_ms: list of branch baseline latency diffs in ms
mgas_diffs: list of branch baseline Mgas/s diffs
pairs: list of (baseline_ms, feature_ms) tuples
lat_diffs_ms: list of feature baseline latency diffs in ms
mgas_diffs: list of feature baseline Mgas/s diffs
"""
baseline_by_block = {r["block_number"]: r for r in baseline}
branch_by_block = {r["block_number"]: r for r in branch}
common_blocks = sorted(set(baseline_by_block) & set(branch_by_block))
feature_by_block = {r["block_number"]: r for r in feature}
common_blocks = sorted(set(baseline_by_block) & set(feature_by_block))
pairs = []
lat_diffs_ms = []
mgas_diffs = []
for bn in common_blocks:
b = baseline_by_block[bn]
f = branch_by_block[bn]
f = feature_by_block[bn]
b_ms = b["new_payload_latency_us"] / 1_000
f_ms = f["new_payload_latency_us"] / 1_000
pairs.append((b_ms, f_ms))
@@ -148,18 +148,18 @@ def _paired_data(
def compute_paired_stats(
baseline_runs: list[list[dict]],
branch_runs: list[list[dict]],
feature_runs: list[list[dict]],
) -> dict:
"""Compute paired statistics between baseline and branch runs.
"""Compute paired statistics between baseline and feature runs.
Each pair (baseline_runs[i], branch_runs[i]) produces per-block diffs.
Each pair (baseline_runs[i], feature_runs[i]) produces per-block diffs.
All diffs are pooled for the final CI.
"""
all_pairs = []
all_lat_diffs = []
all_mgas_diffs = []
for baseline, branch in zip(baseline_runs, branch_runs):
pairs, lat_diffs, mgas_diffs = _paired_data(baseline, branch)
for baseline, feature in zip(baseline_runs, feature_runs):
pairs, lat_diffs, mgas_diffs = _paired_data(baseline, feature)
all_pairs.extend(pairs)
all_lat_diffs.extend(lat_diffs)
all_mgas_diffs.extend(mgas_diffs)
@@ -175,10 +175,10 @@ def compute_paired_stats(
# Bootstrap CI on difference-of-percentiles (resample paired blocks)
base_lats = sorted([p[0] for p in all_pairs])
branch_lats = sorted([p[1] for p in all_pairs])
p50_diff = percentile(branch_lats, 50) - percentile(base_lats, 50)
p90_diff = percentile(branch_lats, 90) - percentile(base_lats, 90)
p99_diff = percentile(branch_lats, 99) - percentile(base_lats, 99)
feature_lats = sorted([p[1] for p in all_pairs])
p50_diff = percentile(feature_lats, 50) - percentile(base_lats, 50)
p90_diff = percentile(feature_lats, 90) - percentile(base_lats, 90)
p99_diff = percentile(feature_lats, 99) - percentile(base_lats, 99)
rng = random.Random(42)
p50_boot, p90_boot, p99_boot = [], [], []
@@ -268,10 +268,11 @@ def generate_comparison_table(
paired: dict,
repo: str,
baseline_ref: str,
branch_name: str,
branch_sha: str,
baseline_name: str,
feature_name: str,
feature_sha: str,
) -> str:
"""Generate a markdown comparison table between baseline (main) and branch."""
"""Generate a markdown comparison table between baseline and feature."""
n = paired["n"]
def pct(base: float, feat: float) -> float:
@@ -294,11 +295,11 @@ def generate_comparison_table(
mgas_ci_pct = paired["mgas_ci"] / run1["mean_mgas_s"] * 100.0 if run1["mean_mgas_s"] > 0 else 0.0
base_url = f"https://github.com/{repo}/commit"
baseline_label = f"[`main`]({base_url}/{baseline_ref})"
branch_label = f"[`{branch_name}`]({base_url}/{branch_sha})"
baseline_label = f"[`{baseline_name}`]({base_url}/{baseline_ref})"
feature_label = f"[`{feature_name}`]({base_url}/{feature_sha})"
lines = [
f"| Metric | {baseline_label} | {branch_label} | Change |",
f"| Metric | {baseline_label} | {feature_label} | Change |",
"|--------|------|--------|--------|",
f"| Mean | {fmt_ms(run1['mean_ms'])} | {fmt_ms(run2['mean_ms'])} | {change_str(mean_pct, lat_ci_pct, lower_is_better=True)} |",
f"| StdDev | {fmt_ms(run1['stddev_ms'])} | {fmt_ms(run2['stddev_ms'])} | |",
@@ -314,15 +315,15 @@ def generate_comparison_table(
def generate_markdown(
summary: dict, comparison_table: str,
behind_main: int = 0, repo: str = "", baseline_ref: str = "",
behind_baseline: int = 0, repo: str = "", baseline_ref: str = "", baseline_name: str = "",
) -> str:
"""Generate a markdown comment body."""
lines = ["## Benchmark Results", "", comparison_table]
if behind_main > 0:
s = "s" if behind_main > 1 else ""
diff_link = f"https://github.com/{repo}/compare/{baseline_ref[:12]}...main"
if behind_baseline > 0:
s = "s" if behind_baseline > 1 else ""
diff_link = f"https://github.com/{repo}/compare/{baseline_ref[:12]}...{baseline_name}"
lines.append("")
lines.append(f"> ⚠️ Branch is [**{behind_main} commit{s} behind `main`**]({diff_link}). Consider rebasing for accurate results.")
lines.append(f"> ⚠️ Feature is [**{behind_baseline} commit{s} behind `{baseline_name}`**]({diff_link}). Consider rebasing for accurate results.")
return "\n".join(lines)
@@ -333,8 +334,8 @@ def main():
help="Baseline combined_latency.csv files (A1, A2)",
)
parser.add_argument(
"--branch-csv", nargs="+", required=True,
help="Branch combined_latency.csv files (B1, B2)",
"--feature-csv", "--branch-csv", nargs="+", required=True,
help="Feature combined_latency.csv files (B1, B2)",
)
parser.add_argument("--gas-csv", required=True, help="Path to total_gas.csv")
parser.add_argument(
@@ -345,65 +346,68 @@ def main():
"--repo", default="paradigmxyz/reth", help="GitHub repo (owner/name)"
)
parser.add_argument("--baseline-ref", default=None, help="Baseline commit SHA")
parser.add_argument("--branch-name", default=None, help="Branch name")
parser.add_argument("--branch-sha", default=None, help="Branch commit SHA")
parser.add_argument("--behind-main", type=int, default=0, help="Commits behind main")
parser.add_argument("--baseline-name", default=None, help="Baseline display name")
parser.add_argument("--feature-name", "--branch-name", default=None, help="Feature branch name")
parser.add_argument("--feature-ref", "--branch-sha", "--feature-sha", default=None, help="Feature commit SHA")
parser.add_argument("--behind-baseline", "--behind-main", type=int, default=0, help="Commits behind baseline")
args = parser.parse_args()
if len(args.baseline_csv) != len(args.branch_csv):
print("Must provide equal number of baseline and branch CSVs", file=sys.stderr)
if len(args.baseline_csv) != len(args.feature_csv):
print("Must provide equal number of baseline and feature CSVs", file=sys.stderr)
sys.exit(1)
baseline_runs = []
branch_runs = []
feature_runs = []
for path in args.baseline_csv:
data = parse_combined_csv(path)
if not data:
print(f"No results in {path}", file=sys.stderr)
sys.exit(1)
baseline_runs.append(data)
for path in args.branch_csv:
for path in args.feature_csv:
data = parse_combined_csv(path)
if not data:
print(f"No results in {path}", file=sys.stderr)
sys.exit(1)
branch_runs.append(data)
feature_runs.append(data)
gas = parse_gas_csv(args.gas_csv)
all_baseline = [r for run in baseline_runs for r in run]
all_branch = [r for run in branch_runs for r in run]
all_feature = [r for run in feature_runs for r in run]
summary = compute_summary(all_branch, gas)
summary = compute_summary(all_feature, gas)
with open(args.output_summary, "w") as f:
json.dump(summary, f, indent=2)
print(f"Summary written to {args.output_summary}")
baseline_stats = compute_stats(all_baseline)
branch_stats = compute_stats(all_branch)
paired_stats = compute_paired_stats(baseline_runs, branch_runs)
feature_stats = compute_stats(all_feature)
paired_stats = compute_paired_stats(baseline_runs, feature_runs)
if not paired_stats:
print("No common blocks between baseline and branch runs", file=sys.stderr)
print("No common blocks between baseline and feature runs", file=sys.stderr)
sys.exit(1)
comparison_table = generate_comparison_table(
baseline_stats,
branch_stats,
feature_stats,
paired_stats,
repo=args.repo,
baseline_ref=args.baseline_ref or "main",
branch_name=args.branch_name or "branch",
branch_sha=args.branch_sha or "unknown",
baseline_name=args.baseline_name or "baseline",
feature_name=args.feature_name or "feature",
feature_sha=args.feature_ref or "unknown",
)
print(f"Generated comparison ({paired_stats['n']} paired blocks, "
f"mean diff {paired_stats['mean_diff_ms']:+.3f}ms ± {paired_stats['ci_ms']:.3f}ms)")
markdown = generate_markdown(
summary, comparison_table,
behind_main=args.behind_main,
behind_baseline=args.behind_baseline,
repo=args.repo,
baseline_ref=args.baseline_ref or "",
baseline_name=args.baseline_name or "main",
)
with open(args.output_markdown, "w") as f:

View File

@@ -3,7 +3,7 @@
# The reth-bench job replays real blocks via the Engine API against a reth node
# backed by a local snapshot managed with schelk.
#
# It runs the main (baseline) binary and the branch (candidate) binary on the
# It runs the baseline binary and the feature (candidate) binary on the
# same block range (snapshot recovered between runs) to compare performance.
on:
@@ -108,8 +108,9 @@ jobs:
with:
script: |
const body = context.payload.comment.body.trim();
const known = new Set(['blocks', 'warmup']);
const defaults = { blocks: '500', warmup: '100' };
const intArgs = new Set(['blocks', 'warmup']);
const refArgs = new Set(['baseline', 'feature']);
const defaults = { blocks: '500', warmup: '100', baseline: '', feature: '' };
const unknown = [];
const invalid = [];
const args = body.replace(/^derek bench\s*/, '');
@@ -121,19 +122,27 @@ jobs:
}
const key = part.slice(0, eq);
const value = part.slice(eq + 1);
if (!known.has(key)) {
unknown.push(key);
} else if (!/^\d+$/.test(value)) {
invalid.push(`\`${key}=${value}\` (must be a positive integer)`);
if (intArgs.has(key)) {
if (!/^\d+$/.test(value)) {
invalid.push(`\`${key}=${value}\` (must be a positive integer)`);
} else {
defaults[key] = value;
}
} else if (refArgs.has(key)) {
if (!value) {
invalid.push(`\`${key}=\` (must be a git ref)`);
} else {
defaults[key] = value;
}
} else {
defaults[key] = value;
unknown.push(key);
}
}
const errors = [];
if (unknown.length) errors.push(`Unknown argument(s): \`${unknown.join('`, `')}\``);
if (invalid.length) errors.push(`Invalid value(s): ${invalid.join(', ')}`);
if (errors.length) {
const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`derek bench [blocks=N] [warmup=N]\``;
const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`derek bench [blocks=N] [warmup=N] [baseline=REF] [feature=REF]\``;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
@@ -145,6 +154,8 @@ jobs:
}
core.setOutput('blocks', defaults.blocks);
core.setOutput('warmup', defaults.warmup);
core.setOutput('baseline', defaults.baseline);
core.setOutput('feature', defaults.feature);
core.exportVariable('BENCH_BLOCKS', defaults.blocks);
core.exportVariable('BENCH_WARMUP_BLOCKS', defaults.warmup);
@@ -163,11 +174,13 @@ jobs:
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
const blocks = '${{ steps.args.outputs.blocks }}';
const warmup = '${{ steps.args.outputs.warmup }}';
const baseline = '${{ steps.args.outputs.baseline }}' || 'merge-base';
const feature = '${{ steps.args.outputs.feature }}' || 'PR head';
const { data: comment } = await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Building binaries...\n\n**Config:** ${blocks} blocks, ${warmup} warmup blocks`,
body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Building binaries...\n\n**Config:** ${blocks} blocks, ${warmup} warmup blocks, baseline: \`${baseline}\`, feature: \`${feature}\``,
});
core.setOutput('comment-id', comment.id);
- uses: actions/checkout@v6
@@ -197,14 +210,52 @@ jobs:
echo "All dependencies found"
# Build binaries
- name: Fetch or build main binaries
- name: Resolve PR head branch
id: pr-info
uses: actions/github-script@v7
with:
script: |
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: context.issue.number,
});
core.setOutput('head-ref', pr.head.ref);
core.setOutput('head-sha', pr.head.sha);
- name: Resolve refs
id: refs
run: |
MERGE_BASE=$(git merge-base HEAD origin/main 2>/dev/null || echo "${{ github.sha }}")
.github/scripts/bench-reth-build.sh main "$MERGE_BASE"
- name: Fetch or build branch binaries
run: |
BRANCH_SHA="${{ github.sha }}"
.github/scripts/bench-reth-build.sh branch "$BRANCH_SHA"
BASELINE_ARG="${{ steps.args.outputs.baseline }}"
FEATURE_ARG="${{ steps.args.outputs.feature }}"
if [ -n "$BASELINE_ARG" ]; then
git fetch origin "$BASELINE_ARG" --quiet 2>/dev/null || true
BASELINE_REF=$(git rev-parse "$BASELINE_ARG" 2>/dev/null || git rev-parse "origin/$BASELINE_ARG" 2>/dev/null)
BASELINE_NAME="$BASELINE_ARG"
else
BASELINE_REF=$(git merge-base HEAD origin/main 2>/dev/null || echo "${{ github.sha }}")
BASELINE_NAME="main"
fi
if [ -n "$FEATURE_ARG" ]; then
git fetch origin "$FEATURE_ARG" --quiet 2>/dev/null || true
FEATURE_REF=$(git rev-parse "$FEATURE_ARG" 2>/dev/null || git rev-parse "origin/$FEATURE_ARG" 2>/dev/null)
FEATURE_NAME="$FEATURE_ARG"
else
FEATURE_REF="${{ steps.pr-info.outputs.head-sha }}"
FEATURE_NAME="${{ steps.pr-info.outputs.head-ref }}"
fi
echo "baseline-ref=$BASELINE_REF" >> "$GITHUB_OUTPUT"
echo "baseline-name=$BASELINE_NAME" >> "$GITHUB_OUTPUT"
echo "feature-ref=$FEATURE_REF" >> "$GITHUB_OUTPUT"
echo "feature-name=$FEATURE_NAME" >> "$GITHUB_OUTPUT"
- name: Fetch or build baseline binaries
run: .github/scripts/bench-reth-build.sh baseline "${{ steps.refs.outputs.baseline-ref }}"
- name: Fetch or build feature binaries
run: .github/scripts/bench-reth-build.sh feature "${{ steps.refs.outputs.feature-ref }}"
# System tuning for reproducible benchmarks
- name: System setup
@@ -267,38 +318,38 @@ jobs:
- name: "Run benchmark: baseline"
run: taskset -c 0 .github/scripts/bench-reth-run.sh baseline target/profiling-baseline/reth /tmp/bench-results-baseline
- name: "Run benchmark: branch"
run: taskset -c 0 .github/scripts/bench-reth-run.sh branch target/profiling/reth /tmp/bench-results-branch
- name: "Run benchmark: feature"
run: taskset -c 0 .github/scripts/bench-reth-run.sh feature target/profiling/reth /tmp/bench-results-feature
# Results & charts
- name: Parse results
id: results
if: success()
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
BRANCH_SHA: ${{ github.sha }}
BASELINE_REF: ${{ steps.refs.outputs.baseline-ref }}
BASELINE_NAME: ${{ steps.refs.outputs.baseline-name }}
FEATURE_NAME: ${{ steps.refs.outputs.feature-name }}
FEATURE_REF: ${{ steps.refs.outputs.feature-ref }}
run: |
git fetch origin main --quiet
# Use the actual PR head commit, not HEAD (which is the merge commit
# refs/pull/N/merge and always has origin/main as a parent).
MERGE_BASE=$(git merge-base "${BRANCH_SHA}" origin/main 2>/dev/null || echo "${{ github.sha }}")
MAIN_HEAD=$(git rev-parse origin/main 2>/dev/null || echo "")
BEHIND_MAIN=0
if [ -n "$MAIN_HEAD" ] && [ "$MERGE_BASE" != "$MAIN_HEAD" ]; then
BEHIND_MAIN=$(git rev-list --count "${MERGE_BASE}..${MAIN_HEAD}" 2>/dev/null || echo "0")
git fetch origin "${BASELINE_NAME}" --quiet 2>/dev/null || true
BASELINE_HEAD=$(git rev-parse "origin/${BASELINE_NAME}" 2>/dev/null || echo "")
BEHIND_BASELINE=0
if [ -n "$BASELINE_HEAD" ] && [ "$BASELINE_REF" != "$BASELINE_HEAD" ]; then
BEHIND_BASELINE=$(git rev-list --count "${BASELINE_REF}..${BASELINE_HEAD}" 2>/dev/null || echo "0")
fi
SUMMARY_ARGS="--output-summary /tmp/bench-summary.json"
SUMMARY_ARGS="$SUMMARY_ARGS --output-markdown /tmp/bench-comment.md"
SUMMARY_ARGS="$SUMMARY_ARGS --repo ${{ github.repository }}"
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-ref ${MERGE_BASE}"
SUMMARY_ARGS="$SUMMARY_ARGS --branch-name ${BRANCH_NAME}"
SUMMARY_ARGS="$SUMMARY_ARGS --branch-sha ${BRANCH_SHA}"
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-ref ${BASELINE_REF}"
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-name ${BASELINE_NAME}"
SUMMARY_ARGS="$SUMMARY_ARGS --feature-name ${FEATURE_NAME}"
SUMMARY_ARGS="$SUMMARY_ARGS --feature-ref ${FEATURE_REF}"
SUMMARY_ARGS="$SUMMARY_ARGS --baseline-csv /tmp/bench-results-baseline/combined_latency.csv"
SUMMARY_ARGS="$SUMMARY_ARGS --branch-csv /tmp/bench-results-branch/combined_latency.csv"
SUMMARY_ARGS="$SUMMARY_ARGS --gas-csv /tmp/bench-results-branch/total_gas.csv"
if [ "$BEHIND_MAIN" -gt 0 ]; then
SUMMARY_ARGS="$SUMMARY_ARGS --behind-main $BEHIND_MAIN"
SUMMARY_ARGS="$SUMMARY_ARGS --feature-csv /tmp/bench-results-feature/combined_latency.csv"
SUMMARY_ARGS="$SUMMARY_ARGS --gas-csv /tmp/bench-results-feature/total_gas.csv"
if [ "$BEHIND_BASELINE" -gt 0 ]; then
SUMMARY_ARGS="$SUMMARY_ARGS --behind-baseline $BEHIND_BASELINE"
fi
# shellcheck disable=SC2086
python3 .github/scripts/bench-reth-summary.py $SUMMARY_ARGS
@@ -306,11 +357,13 @@ jobs:
- name: Generate charts
if: success()
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
BASELINE_NAME: ${{ steps.refs.outputs.baseline-name }}
FEATURE_NAME: ${{ steps.refs.outputs.feature-name }}
run: |
CHART_ARGS="/tmp/bench-results-branch/combined_latency.csv --output-dir /tmp/bench-charts"
CHART_ARGS="/tmp/bench-results-feature/combined_latency.csv --output-dir /tmp/bench-charts"
CHART_ARGS="$CHART_ARGS --baseline /tmp/bench-results-baseline/combined_latency.csv"
CHART_ARGS="$CHART_ARGS --branch-name ${BRANCH_NAME}"
CHART_ARGS="$CHART_ARGS --baseline-name ${BASELINE_NAME}"
CHART_ARGS="$CHART_ARGS --feature-name ${FEATURE_NAME}"
# shellcheck disable=SC2086
uv run --with matplotlib python3 .github/scripts/bench-reth-charts.py $CHART_ARGS
@@ -321,7 +374,7 @@ jobs:
name: bench-reth-results
path: |
/tmp/bench-results-baseline/
/tmp/bench-results-branch/
/tmp/bench-results-feature/
/tmp/bench-summary.json
/tmp/bench-charts/
@@ -410,7 +463,7 @@ jobs:
name: reth-node-log
path: |
/tmp/reth-bench-node-baseline.log
/tmp/reth-bench-node-branch.log
/tmp/reth-bench-node-feature.log
- name: Restore system settings
if: always()