From b49cadb346522bf70e133917535f4cea494aeca9 Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Tue, 17 Feb 2026 23:00:01 +0000 Subject: [PATCH] ci(bench): rename main/branch to baseline/feature, add ref args (#22284) Co-authored-by: Georgios Konstantopoulos Co-authored-by: Amp --- .github/scripts/bench-reth-build.sh | 22 ++--- .github/scripts/bench-reth-charts.py | 24 ++--- .github/scripts/bench-reth-summary.py | 104 ++++++++++---------- .github/workflows/bench.yml | 135 ++++++++++++++++++-------- 4 files changed, 171 insertions(+), 114 deletions(-) diff --git a/.github/scripts/bench-reth-build.sh b/.github/scripts/bench-reth-build.sh index 2d0046caad..5850e6ff79 100755 --- a/.github/scripts/bench-reth-build.sh +++ b/.github/scripts/bench-reth-build.sh @@ -2,15 +2,15 @@ # # Builds (or fetches from cache) reth binaries for benchmarking. # -# Usage: bench-reth-build.sh [branch-sha] +# Usage: bench-reth-build.sh [branch-sha] # -# main — build/fetch the baseline binary at (merge-base) -# branch — build/fetch the candidate binary + reth-bench at -# optional branch-sha is the PR head commit for cache key +# baseline — build/fetch the baseline binary at (merge-base) +# feature — build/fetch the candidate binary + reth-bench at +# optional branch-sha is the PR head commit for cache key # # Outputs: -# main: target/profiling-baseline/reth -# branch: target/profiling/reth, reth-bench installed to cargo bin +# baseline: target/profiling-baseline/reth +# feature: target/profiling/reth, reth-bench installed to cargo bin # # Required: mc (MinIO client) configured at /home/ubuntu/.mc set -euo pipefail @@ -20,16 +20,16 @@ MODE="$1" COMMIT="$2" case "$MODE" in - main) + baseline|main) BUCKET="minio/reth-binaries/${COMMIT}" mkdir -p target/profiling-baseline if $MC stat "${BUCKET}/reth" &>/dev/null; then - echo "Cache hit for main (${COMMIT}), downloading binary..." + echo "Cache hit for baseline (${COMMIT}), downloading binary..." $MC cp "${BUCKET}/reth" target/profiling-baseline/reth chmod +x target/profiling-baseline/reth else - echo "Cache miss for main (${COMMIT}), building from source..." + echo "Cache miss for baseline (${COMMIT}), building from source..." CURRENT_REF=$(git rev-parse HEAD) git checkout "${COMMIT}" cargo build --profile profiling --bin reth @@ -39,7 +39,7 @@ case "$MODE" in fi ;; - branch) + feature|branch) BRANCH_SHA="${3:-$COMMIT}" BUCKET="minio/reth-binaries/${BRANCH_SHA}" @@ -60,7 +60,7 @@ case "$MODE" in ;; *) - echo "Usage: $0 [branch-sha]" + echo "Usage: $0 [branch-sha]" exit 1 ;; esac diff --git a/.github/scripts/bench-reth-charts.py b/.github/scripts/bench-reth-charts.py index bc9d244832..b0c114a470 100644 --- a/.github/scripts/bench-reth-charts.py +++ b/.github/scripts/bench-reth-charts.py @@ -53,7 +53,7 @@ def parse_combined_csv(path: str) -> list[dict]: def plot_latency_and_throughput( feature: list[dict], baseline: list[dict] | None, out: Path, - baseline_name: str = "main", branch_name: str = "branch", + baseline_name: str = "baseline", feature_name: str = "feature", ): num_plots = 3 if baseline else 2 fig, axes = plt.subplots(num_plots, 1, figsize=(12, 4 * num_plots), sharex=True) @@ -76,14 +76,14 @@ def plot_latency_and_throughput( ax1.plot(base_x, base_lat, linewidth=0.8, label=baseline_name, alpha=0.7) ax2.plot(base_x, base_ggas, linewidth=0.8, label=baseline_name, alpha=0.7) - ax1.plot(feat_x, feat_lat, linewidth=0.8, label=branch_name) + ax1.plot(feat_x, feat_lat, linewidth=0.8, label=feature_name) ax1.set_ylabel("Latency (ms)") ax1.set_title("newPayload Latency per Block") ax1.grid(True, alpha=0.3) if baseline: ax1.legend() - ax2.plot(feat_x, feat_ggas, linewidth=0.8, label=branch_name) + ax2.plot(feat_x, feat_ggas, linewidth=0.8, label=feature_name) ax2.set_ylabel("Ggas/s") ax2.set_title("Execution Throughput per Block") ax2.grid(True, alpha=0.3) @@ -105,7 +105,7 @@ def plot_latency_and_throughput( ax3.bar(blocks, diffs, width=1.0, color=colors, alpha=0.7, edgecolor="none") ax3.axhline(0, color="black", linewidth=0.5) ax3.set_ylabel("Δ Latency (%)") - ax3.set_title("Per-Block newPayload Latency Change (branch vs main)") + ax3.set_title("Per-Block newPayload Latency Change (feature vs baseline)") ax3.grid(True, alpha=0.3, axis="y") axes[-1].set_xlabel("Block Number") @@ -116,7 +116,7 @@ def plot_latency_and_throughput( def plot_wait_breakdown( feature: list[dict], baseline: list[dict] | None, out: Path, - baseline_name: str = "main", branch_name: str = "branch", + baseline_name: str = "baseline", feature_name: str = "feature", ): series = [ ("Persistence Wait", "persistence_wait_us"), @@ -135,7 +135,7 @@ def plot_wait_breakdown( fx = [r["block_number"] for r in feature if r[key] is not None] fy = [r[key] / 1_000 for r in feature if r[key] is not None] if fx: - ax.plot(fx, fy, linewidth=0.8, label=branch_name) + ax.plot(fx, fy, linewidth=0.8, label=feature_name) ax.set_ylabel("ms") ax.set_title(label) @@ -163,7 +163,7 @@ def _add_regression(ax, x, y, color, label): def plot_gas_vs_latency( feature: list[dict], baseline: list[dict] | None, out: Path, - baseline_name: str = "main", branch_name: str = "branch", + baseline_name: str = "baseline", feature_name: str = "feature", ): fig, ax = plt.subplots(figsize=(8, 6)) @@ -176,7 +176,7 @@ def plot_gas_vs_latency( fgas = [r["gas_used"] / 1_000_000 for r in feature] flat = [r["new_payload_latency_us"] / 1_000 for r in feature] ax.scatter(fgas, flat, s=8, alpha=0.6) - _add_regression(ax, fgas, flat, "tab:orange", branch_name) + _add_regression(ax, fgas, flat, "tab:orange", feature_name) ax.set_xlabel("Gas Used (Mgas)") ax.set_ylabel("newPayload Latency (ms)") @@ -195,10 +195,10 @@ def main(): "--output-dir", required=True, help="Output directory for PNG charts" ) parser.add_argument( - "--baseline", help="Path to baseline (main) combined_latency.csv" + "--baseline", help="Path to baseline combined_latency.csv" ) - parser.add_argument("--baseline-name", default="main", help="Label for baseline") - parser.add_argument("--branch-name", default="branch", help="Label for branch") + parser.add_argument("--baseline-name", default="baseline", help="Label for baseline") + parser.add_argument("--feature-name", "--branch-name", default="feature", help="Label for feature") args = parser.parse_args() feature = parse_combined_csv(args.combined_csv) @@ -220,7 +220,7 @@ def main(): out_dir.mkdir(parents=True, exist_ok=True) bname = args.baseline_name - fname = args.branch_name + fname = args.feature_name plot_latency_and_throughput(feature, baseline, out_dir / "latency_throughput.png", bname, fname) plot_wait_breakdown(feature, baseline, out_dir / "wait_breakdown.png", bname, fname) plot_gas_vs_latency(feature, baseline, out_dir / "gas_vs_latency.png", bname, fname) diff --git a/.github/scripts/bench-reth-summary.py b/.github/scripts/bench-reth-summary.py index 7088350feb..14369f7166 100755 --- a/.github/scripts/bench-reth-summary.py +++ b/.github/scripts/bench-reth-summary.py @@ -8,12 +8,12 @@ Usage: --baseline-csv \ [--repo ] \ [--baseline-ref ] \ - [--branch-name ] \ - [--branch-sha ] + [--feature-name ] \ + [--feature-sha ] -Generates a paired statistical comparison between baseline (main) and branch. +Generates a paired statistical comparison between baseline and feature. Matches blocks by number and computes per-block diffs to cancel out gas -variance. Fails if baseline or branch CSV is missing or empty. +variance. Fails if baseline or feature CSV is missing or empty. """ import argparse @@ -113,25 +113,25 @@ def compute_stats(combined: list[dict]) -> dict: def _paired_data( - baseline: list[dict], branch: list[dict] + baseline: list[dict], feature: list[dict] ) -> tuple[list[tuple[float, float]], list[float], list[float]]: """Match blocks and return paired latencies and per-block diffs. Returns: - pairs: list of (baseline_ms, branch_ms) tuples - lat_diffs_ms: list of branch − baseline latency diffs in ms - mgas_diffs: list of branch − baseline Mgas/s diffs + pairs: list of (baseline_ms, feature_ms) tuples + lat_diffs_ms: list of feature − baseline latency diffs in ms + mgas_diffs: list of feature − baseline Mgas/s diffs """ baseline_by_block = {r["block_number"]: r for r in baseline} - branch_by_block = {r["block_number"]: r for r in branch} - common_blocks = sorted(set(baseline_by_block) & set(branch_by_block)) + feature_by_block = {r["block_number"]: r for r in feature} + common_blocks = sorted(set(baseline_by_block) & set(feature_by_block)) pairs = [] lat_diffs_ms = [] mgas_diffs = [] for bn in common_blocks: b = baseline_by_block[bn] - f = branch_by_block[bn] + f = feature_by_block[bn] b_ms = b["new_payload_latency_us"] / 1_000 f_ms = f["new_payload_latency_us"] / 1_000 pairs.append((b_ms, f_ms)) @@ -148,18 +148,18 @@ def _paired_data( def compute_paired_stats( baseline_runs: list[list[dict]], - branch_runs: list[list[dict]], + feature_runs: list[list[dict]], ) -> dict: - """Compute paired statistics between baseline and branch runs. + """Compute paired statistics between baseline and feature runs. - Each pair (baseline_runs[i], branch_runs[i]) produces per-block diffs. + Each pair (baseline_runs[i], feature_runs[i]) produces per-block diffs. All diffs are pooled for the final CI. """ all_pairs = [] all_lat_diffs = [] all_mgas_diffs = [] - for baseline, branch in zip(baseline_runs, branch_runs): - pairs, lat_diffs, mgas_diffs = _paired_data(baseline, branch) + for baseline, feature in zip(baseline_runs, feature_runs): + pairs, lat_diffs, mgas_diffs = _paired_data(baseline, feature) all_pairs.extend(pairs) all_lat_diffs.extend(lat_diffs) all_mgas_diffs.extend(mgas_diffs) @@ -175,10 +175,10 @@ def compute_paired_stats( # Bootstrap CI on difference-of-percentiles (resample paired blocks) base_lats = sorted([p[0] for p in all_pairs]) - branch_lats = sorted([p[1] for p in all_pairs]) - p50_diff = percentile(branch_lats, 50) - percentile(base_lats, 50) - p90_diff = percentile(branch_lats, 90) - percentile(base_lats, 90) - p99_diff = percentile(branch_lats, 99) - percentile(base_lats, 99) + feature_lats = sorted([p[1] for p in all_pairs]) + p50_diff = percentile(feature_lats, 50) - percentile(base_lats, 50) + p90_diff = percentile(feature_lats, 90) - percentile(base_lats, 90) + p99_diff = percentile(feature_lats, 99) - percentile(base_lats, 99) rng = random.Random(42) p50_boot, p90_boot, p99_boot = [], [], [] @@ -268,10 +268,11 @@ def generate_comparison_table( paired: dict, repo: str, baseline_ref: str, - branch_name: str, - branch_sha: str, + baseline_name: str, + feature_name: str, + feature_sha: str, ) -> str: - """Generate a markdown comparison table between baseline (main) and branch.""" + """Generate a markdown comparison table between baseline and feature.""" n = paired["n"] def pct(base: float, feat: float) -> float: @@ -294,11 +295,11 @@ def generate_comparison_table( mgas_ci_pct = paired["mgas_ci"] / run1["mean_mgas_s"] * 100.0 if run1["mean_mgas_s"] > 0 else 0.0 base_url = f"https://github.com/{repo}/commit" - baseline_label = f"[`main`]({base_url}/{baseline_ref})" - branch_label = f"[`{branch_name}`]({base_url}/{branch_sha})" + baseline_label = f"[`{baseline_name}`]({base_url}/{baseline_ref})" + feature_label = f"[`{feature_name}`]({base_url}/{feature_sha})" lines = [ - f"| Metric | {baseline_label} | {branch_label} | Change |", + f"| Metric | {baseline_label} | {feature_label} | Change |", "|--------|------|--------|--------|", f"| Mean | {fmt_ms(run1['mean_ms'])} | {fmt_ms(run2['mean_ms'])} | {change_str(mean_pct, lat_ci_pct, lower_is_better=True)} |", f"| StdDev | {fmt_ms(run1['stddev_ms'])} | {fmt_ms(run2['stddev_ms'])} | |", @@ -314,15 +315,15 @@ def generate_comparison_table( def generate_markdown( summary: dict, comparison_table: str, - behind_main: int = 0, repo: str = "", baseline_ref: str = "", + behind_baseline: int = 0, repo: str = "", baseline_ref: str = "", baseline_name: str = "", ) -> str: """Generate a markdown comment body.""" lines = ["## Benchmark Results", "", comparison_table] - if behind_main > 0: - s = "s" if behind_main > 1 else "" - diff_link = f"https://github.com/{repo}/compare/{baseline_ref[:12]}...main" + if behind_baseline > 0: + s = "s" if behind_baseline > 1 else "" + diff_link = f"https://github.com/{repo}/compare/{baseline_ref[:12]}...{baseline_name}" lines.append("") - lines.append(f"> ⚠️ Branch is [**{behind_main} commit{s} behind `main`**]({diff_link}). Consider rebasing for accurate results.") + lines.append(f"> ⚠️ Feature is [**{behind_baseline} commit{s} behind `{baseline_name}`**]({diff_link}). Consider rebasing for accurate results.") return "\n".join(lines) @@ -333,8 +334,8 @@ def main(): help="Baseline combined_latency.csv files (A1, A2)", ) parser.add_argument( - "--branch-csv", nargs="+", required=True, - help="Branch combined_latency.csv files (B1, B2)", + "--feature-csv", "--branch-csv", nargs="+", required=True, + help="Feature combined_latency.csv files (B1, B2)", ) parser.add_argument("--gas-csv", required=True, help="Path to total_gas.csv") parser.add_argument( @@ -345,65 +346,68 @@ def main(): "--repo", default="paradigmxyz/reth", help="GitHub repo (owner/name)" ) parser.add_argument("--baseline-ref", default=None, help="Baseline commit SHA") - parser.add_argument("--branch-name", default=None, help="Branch name") - parser.add_argument("--branch-sha", default=None, help="Branch commit SHA") - parser.add_argument("--behind-main", type=int, default=0, help="Commits behind main") + parser.add_argument("--baseline-name", default=None, help="Baseline display name") + parser.add_argument("--feature-name", "--branch-name", default=None, help="Feature branch name") + parser.add_argument("--feature-ref", "--branch-sha", "--feature-sha", default=None, help="Feature commit SHA") + parser.add_argument("--behind-baseline", "--behind-main", type=int, default=0, help="Commits behind baseline") args = parser.parse_args() - if len(args.baseline_csv) != len(args.branch_csv): - print("Must provide equal number of baseline and branch CSVs", file=sys.stderr) + if len(args.baseline_csv) != len(args.feature_csv): + print("Must provide equal number of baseline and feature CSVs", file=sys.stderr) sys.exit(1) baseline_runs = [] - branch_runs = [] + feature_runs = [] for path in args.baseline_csv: data = parse_combined_csv(path) if not data: print(f"No results in {path}", file=sys.stderr) sys.exit(1) baseline_runs.append(data) - for path in args.branch_csv: + for path in args.feature_csv: data = parse_combined_csv(path) if not data: print(f"No results in {path}", file=sys.stderr) sys.exit(1) - branch_runs.append(data) + feature_runs.append(data) gas = parse_gas_csv(args.gas_csv) all_baseline = [r for run in baseline_runs for r in run] - all_branch = [r for run in branch_runs for r in run] + all_feature = [r for run in feature_runs for r in run] - summary = compute_summary(all_branch, gas) + summary = compute_summary(all_feature, gas) with open(args.output_summary, "w") as f: json.dump(summary, f, indent=2) print(f"Summary written to {args.output_summary}") baseline_stats = compute_stats(all_baseline) - branch_stats = compute_stats(all_branch) - paired_stats = compute_paired_stats(baseline_runs, branch_runs) + feature_stats = compute_stats(all_feature) + paired_stats = compute_paired_stats(baseline_runs, feature_runs) if not paired_stats: - print("No common blocks between baseline and branch runs", file=sys.stderr) + print("No common blocks between baseline and feature runs", file=sys.stderr) sys.exit(1) comparison_table = generate_comparison_table( baseline_stats, - branch_stats, + feature_stats, paired_stats, repo=args.repo, baseline_ref=args.baseline_ref or "main", - branch_name=args.branch_name or "branch", - branch_sha=args.branch_sha or "unknown", + baseline_name=args.baseline_name or "baseline", + feature_name=args.feature_name or "feature", + feature_sha=args.feature_ref or "unknown", ) print(f"Generated comparison ({paired_stats['n']} paired blocks, " f"mean diff {paired_stats['mean_diff_ms']:+.3f}ms ± {paired_stats['ci_ms']:.3f}ms)") markdown = generate_markdown( summary, comparison_table, - behind_main=args.behind_main, + behind_baseline=args.behind_baseline, repo=args.repo, baseline_ref=args.baseline_ref or "", + baseline_name=args.baseline_name or "main", ) with open(args.output_markdown, "w") as f: diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 335b4e9489..08d1c50181 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -3,7 +3,7 @@ # The reth-bench job replays real blocks via the Engine API against a reth node # backed by a local snapshot managed with schelk. # -# It runs the main (baseline) binary and the branch (candidate) binary on the +# It runs the baseline binary and the feature (candidate) binary on the # same block range (snapshot recovered between runs) to compare performance. on: @@ -108,8 +108,9 @@ jobs: with: script: | const body = context.payload.comment.body.trim(); - const known = new Set(['blocks', 'warmup']); - const defaults = { blocks: '500', warmup: '100' }; + const intArgs = new Set(['blocks', 'warmup']); + const refArgs = new Set(['baseline', 'feature']); + const defaults = { blocks: '500', warmup: '100', baseline: '', feature: '' }; const unknown = []; const invalid = []; const args = body.replace(/^derek bench\s*/, ''); @@ -121,19 +122,27 @@ jobs: } const key = part.slice(0, eq); const value = part.slice(eq + 1); - if (!known.has(key)) { - unknown.push(key); - } else if (!/^\d+$/.test(value)) { - invalid.push(`\`${key}=${value}\` (must be a positive integer)`); + if (intArgs.has(key)) { + if (!/^\d+$/.test(value)) { + invalid.push(`\`${key}=${value}\` (must be a positive integer)`); + } else { + defaults[key] = value; + } + } else if (refArgs.has(key)) { + if (!value) { + invalid.push(`\`${key}=\` (must be a git ref)`); + } else { + defaults[key] = value; + } } else { - defaults[key] = value; + unknown.push(key); } } const errors = []; if (unknown.length) errors.push(`Unknown argument(s): \`${unknown.join('`, `')}\``); if (invalid.length) errors.push(`Invalid value(s): ${invalid.join(', ')}`); if (errors.length) { - const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`derek bench [blocks=N] [warmup=N]\``; + const msg = `❌ **Invalid bench command**\n\n${errors.join('\n')}\n\n**Usage:** \`derek bench [blocks=N] [warmup=N] [baseline=REF] [feature=REF]\``; await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, @@ -145,6 +154,8 @@ jobs: } core.setOutput('blocks', defaults.blocks); core.setOutput('warmup', defaults.warmup); + core.setOutput('baseline', defaults.baseline); + core.setOutput('feature', defaults.feature); core.exportVariable('BENCH_BLOCKS', defaults.blocks); core.exportVariable('BENCH_WARMUP_BLOCKS', defaults.warmup); @@ -163,11 +174,13 @@ jobs: const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; const blocks = '${{ steps.args.outputs.blocks }}'; const warmup = '${{ steps.args.outputs.warmup }}'; + const baseline = '${{ steps.args.outputs.baseline }}' || 'merge-base'; + const feature = '${{ steps.args.outputs.feature }}' || 'PR head'; const { data: comment } = await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Building binaries...\n\n**Config:** ${blocks} blocks, ${warmup} warmup blocks`, + body: `🚀 Benchmark started! [View run](${runUrl})\n\n⏳ **Status:** Building binaries...\n\n**Config:** ${blocks} blocks, ${warmup} warmup blocks, baseline: \`${baseline}\`, feature: \`${feature}\``, }); core.setOutput('comment-id', comment.id); - uses: actions/checkout@v6 @@ -197,14 +210,52 @@ jobs: echo "All dependencies found" # Build binaries - - name: Fetch or build main binaries + - name: Resolve PR head branch + id: pr-info + uses: actions/github-script@v7 + with: + script: | + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + }); + core.setOutput('head-ref', pr.head.ref); + core.setOutput('head-sha', pr.head.sha); + + - name: Resolve refs + id: refs run: | - MERGE_BASE=$(git merge-base HEAD origin/main 2>/dev/null || echo "${{ github.sha }}") - .github/scripts/bench-reth-build.sh main "$MERGE_BASE" - - name: Fetch or build branch binaries - run: | - BRANCH_SHA="${{ github.sha }}" - .github/scripts/bench-reth-build.sh branch "$BRANCH_SHA" + BASELINE_ARG="${{ steps.args.outputs.baseline }}" + FEATURE_ARG="${{ steps.args.outputs.feature }}" + + if [ -n "$BASELINE_ARG" ]; then + git fetch origin "$BASELINE_ARG" --quiet 2>/dev/null || true + BASELINE_REF=$(git rev-parse "$BASELINE_ARG" 2>/dev/null || git rev-parse "origin/$BASELINE_ARG" 2>/dev/null) + BASELINE_NAME="$BASELINE_ARG" + else + BASELINE_REF=$(git merge-base HEAD origin/main 2>/dev/null || echo "${{ github.sha }}") + BASELINE_NAME="main" + fi + + if [ -n "$FEATURE_ARG" ]; then + git fetch origin "$FEATURE_ARG" --quiet 2>/dev/null || true + FEATURE_REF=$(git rev-parse "$FEATURE_ARG" 2>/dev/null || git rev-parse "origin/$FEATURE_ARG" 2>/dev/null) + FEATURE_NAME="$FEATURE_ARG" + else + FEATURE_REF="${{ steps.pr-info.outputs.head-sha }}" + FEATURE_NAME="${{ steps.pr-info.outputs.head-ref }}" + fi + + echo "baseline-ref=$BASELINE_REF" >> "$GITHUB_OUTPUT" + echo "baseline-name=$BASELINE_NAME" >> "$GITHUB_OUTPUT" + echo "feature-ref=$FEATURE_REF" >> "$GITHUB_OUTPUT" + echo "feature-name=$FEATURE_NAME" >> "$GITHUB_OUTPUT" + + - name: Fetch or build baseline binaries + run: .github/scripts/bench-reth-build.sh baseline "${{ steps.refs.outputs.baseline-ref }}" + - name: Fetch or build feature binaries + run: .github/scripts/bench-reth-build.sh feature "${{ steps.refs.outputs.feature-ref }}" # System tuning for reproducible benchmarks - name: System setup @@ -267,38 +318,38 @@ jobs: - name: "Run benchmark: baseline" run: taskset -c 0 .github/scripts/bench-reth-run.sh baseline target/profiling-baseline/reth /tmp/bench-results-baseline - - name: "Run benchmark: branch" - run: taskset -c 0 .github/scripts/bench-reth-run.sh branch target/profiling/reth /tmp/bench-results-branch + - name: "Run benchmark: feature" + run: taskset -c 0 .github/scripts/bench-reth-run.sh feature target/profiling/reth /tmp/bench-results-feature # Results & charts - name: Parse results id: results if: success() env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - BRANCH_SHA: ${{ github.sha }} + BASELINE_REF: ${{ steps.refs.outputs.baseline-ref }} + BASELINE_NAME: ${{ steps.refs.outputs.baseline-name }} + FEATURE_NAME: ${{ steps.refs.outputs.feature-name }} + FEATURE_REF: ${{ steps.refs.outputs.feature-ref }} run: | - git fetch origin main --quiet - # Use the actual PR head commit, not HEAD (which is the merge commit - # refs/pull/N/merge and always has origin/main as a parent). - MERGE_BASE=$(git merge-base "${BRANCH_SHA}" origin/main 2>/dev/null || echo "${{ github.sha }}") - MAIN_HEAD=$(git rev-parse origin/main 2>/dev/null || echo "") - BEHIND_MAIN=0 - if [ -n "$MAIN_HEAD" ] && [ "$MERGE_BASE" != "$MAIN_HEAD" ]; then - BEHIND_MAIN=$(git rev-list --count "${MERGE_BASE}..${MAIN_HEAD}" 2>/dev/null || echo "0") + git fetch origin "${BASELINE_NAME}" --quiet 2>/dev/null || true + BASELINE_HEAD=$(git rev-parse "origin/${BASELINE_NAME}" 2>/dev/null || echo "") + BEHIND_BASELINE=0 + if [ -n "$BASELINE_HEAD" ] && [ "$BASELINE_REF" != "$BASELINE_HEAD" ]; then + BEHIND_BASELINE=$(git rev-list --count "${BASELINE_REF}..${BASELINE_HEAD}" 2>/dev/null || echo "0") fi SUMMARY_ARGS="--output-summary /tmp/bench-summary.json" SUMMARY_ARGS="$SUMMARY_ARGS --output-markdown /tmp/bench-comment.md" SUMMARY_ARGS="$SUMMARY_ARGS --repo ${{ github.repository }}" - SUMMARY_ARGS="$SUMMARY_ARGS --baseline-ref ${MERGE_BASE}" - SUMMARY_ARGS="$SUMMARY_ARGS --branch-name ${BRANCH_NAME}" - SUMMARY_ARGS="$SUMMARY_ARGS --branch-sha ${BRANCH_SHA}" + SUMMARY_ARGS="$SUMMARY_ARGS --baseline-ref ${BASELINE_REF}" + SUMMARY_ARGS="$SUMMARY_ARGS --baseline-name ${BASELINE_NAME}" + SUMMARY_ARGS="$SUMMARY_ARGS --feature-name ${FEATURE_NAME}" + SUMMARY_ARGS="$SUMMARY_ARGS --feature-ref ${FEATURE_REF}" SUMMARY_ARGS="$SUMMARY_ARGS --baseline-csv /tmp/bench-results-baseline/combined_latency.csv" - SUMMARY_ARGS="$SUMMARY_ARGS --branch-csv /tmp/bench-results-branch/combined_latency.csv" - SUMMARY_ARGS="$SUMMARY_ARGS --gas-csv /tmp/bench-results-branch/total_gas.csv" - if [ "$BEHIND_MAIN" -gt 0 ]; then - SUMMARY_ARGS="$SUMMARY_ARGS --behind-main $BEHIND_MAIN" + SUMMARY_ARGS="$SUMMARY_ARGS --feature-csv /tmp/bench-results-feature/combined_latency.csv" + SUMMARY_ARGS="$SUMMARY_ARGS --gas-csv /tmp/bench-results-feature/total_gas.csv" + if [ "$BEHIND_BASELINE" -gt 0 ]; then + SUMMARY_ARGS="$SUMMARY_ARGS --behind-baseline $BEHIND_BASELINE" fi # shellcheck disable=SC2086 python3 .github/scripts/bench-reth-summary.py $SUMMARY_ARGS @@ -306,11 +357,13 @@ jobs: - name: Generate charts if: success() env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + BASELINE_NAME: ${{ steps.refs.outputs.baseline-name }} + FEATURE_NAME: ${{ steps.refs.outputs.feature-name }} run: | - CHART_ARGS="/tmp/bench-results-branch/combined_latency.csv --output-dir /tmp/bench-charts" + CHART_ARGS="/tmp/bench-results-feature/combined_latency.csv --output-dir /tmp/bench-charts" CHART_ARGS="$CHART_ARGS --baseline /tmp/bench-results-baseline/combined_latency.csv" - CHART_ARGS="$CHART_ARGS --branch-name ${BRANCH_NAME}" + CHART_ARGS="$CHART_ARGS --baseline-name ${BASELINE_NAME}" + CHART_ARGS="$CHART_ARGS --feature-name ${FEATURE_NAME}" # shellcheck disable=SC2086 uv run --with matplotlib python3 .github/scripts/bench-reth-charts.py $CHART_ARGS @@ -321,7 +374,7 @@ jobs: name: bench-reth-results path: | /tmp/bench-results-baseline/ - /tmp/bench-results-branch/ + /tmp/bench-results-feature/ /tmp/bench-summary.json /tmp/bench-charts/ @@ -410,7 +463,7 @@ jobs: name: reth-node-log path: | /tmp/reth-bench-node-baseline.log - /tmp/reth-bench-node-branch.log + /tmp/reth-bench-node-feature.log - name: Restore system settings if: always()