From acdbd065e2a583d4c2908bc5543215203e370553 Mon Sep 17 00:00:00 2001
From: Derek Cofausper <256792747+decofe@users.noreply.github.com>
Date: Mon, 16 Mar 2026 03:12:35 -0700
Subject: [PATCH] chore(bench): add rich job summary matching Slack output
 (#23046)

Co-authored-by: Sergei Shulepov <2205845+pepyakin@users.noreply.github.com>
---
 .github/scripts/bench-job-summary.js  | 106 ++++++++++++++++++++++++++
 .github/scripts/bench-slack-notify.js | 104 +++++++------------------
 .github/scripts/bench-utils.js        |  96 +++++++++++++++++++++++
 .github/workflows/bench.yml           |  17 ++++-
 4 files changed, 243 insertions(+), 80 deletions(-)
 create mode 100644 .github/scripts/bench-job-summary.js
 create mode 100644 .github/scripts/bench-utils.js

diff --git a/.github/scripts/bench-job-summary.js b/.github/scripts/bench-job-summary.js
new file mode 100644
index 0000000000..21caf88c7d
--- /dev/null
+++ b/.github/scripts/bench-job-summary.js
@@ -0,0 +1,106 @@
+// Generates a rich GitHub Actions job summary for reth-bench results.
+//
+// Reads from environment:
+//   BENCH_WORK_DIR       – Directory containing summary.json
+//   BENCH_PR             – PR number (may be empty)
+//   BENCH_ACTOR          – GitHub user who triggered the bench
+//   BENCH_CORES          – CPU core limit (0 = all)
+//   BENCH_WARMUP_BLOCKS  – Number of warmup blocks
+//   BENCH_SAMPLY         – 'true' if samply profiling was enabled
+//   BENCH_ABBA           – 'true' if ABBA interleaved order was used
+//
+// Usage from actions/github-script:
+//   const jobSummary = require('./.github/scripts/bench-job-summary.js');
+//   await jobSummary({ core, context, chartSha, grafanaUrl, runId });
+
+const fs = require('fs');
+const { verdict, loadSamplyUrls, blocksLabel, metricRows, waitTimeRows } = require('./bench-utils');
+
+module.exports = async function ({ core, context, chartSha, grafanaUrl, runId }) {
+  let summary;
+  try {
+    summary = JSON.parse(fs.readFileSync(process.env.BENCH_WORK_DIR + '/summary.json', 'utf8'));
+  } catch (e) {
+    await core.summary.addRaw('⚠️ Benchmark completed but failed to load summary.').write();
+    return;
+  }
+
+  const repo = `${context.repo.owner}/${context.repo.repo}`;
+  const prNumber = process.env.BENCH_PR;
+  const actor = process.env.BENCH_ACTOR;
+  const commitUrl = `https://github.com/${repo}/commit`;
+
+  const { emoji, label } = verdict(summary.changes);
+  const baselineLink = `[\`${summary.baseline.name}\`](${commitUrl}/${summary.baseline.ref})`;
+  const featureLink = `[\`${summary.feature.name}\`](${commitUrl}/${summary.feature.ref})`;
+  const diffUrl = `https://github.com/${repo}/compare/${summary.baseline.ref}...${summary.feature.ref}`;
+
+  // Header & metadata
+  const metaParts = [];
+  if (prNumber) metaParts.push(`**[PR #${prNumber}](https://github.com/${repo}/pull/${prNumber})**`);
+  metaParts.push(`triggered by @${actor}`);
+
+  let md = `# ${emoji} ${label}\n\n`;
+  md += metaParts.join(' · ') + '\n\n';
+  md += `**Baseline:** ${baselineLink}\n`;
+  md += `**Feature:** ${featureLink} ([diff](${diffUrl}))\n`;
+  md += blocksLabel(summary).map(p => `**${p.key}:** ${p.value}`).join(' · ') + '\n\n';
+
+  // Main comparison table
+  const rows = metricRows(summary);
+  md += `| Metric | Baseline | Feature | Change |\n`;
+  md += `|--------|----------|---------|--------|\n`;
+  for (const r of rows) {
+    md += `| ${r.label} | ${r.baseline} | ${r.feature} | ${r.change} |\n`;
+  }
+  md += '\n';
+
+  // Wait time breakdown
+  const wtRows = waitTimeRows(summary);
+  if (wtRows.length > 0) {
+    md += `### Wait Time Breakdown\n\n`;
+    md += `| Metric | Baseline | Feature |\n`;
+    md += `|--------|----------|--------|\n`;
+    for (const r of wtRows) {
+      md += `| ${r.title} | ${r.baseline} | ${r.feature} |\n`;
+    }
+    md += '\n';
+  }
+
+  // Charts
+  if (chartSha) {
+    const prNum = prNumber || '0';
+    const baseUrl = `https://raw.githubusercontent.com/decofe/reth-bench-charts/${chartSha}/pr/${prNum}/${runId}`;
+    const charts = [
+      { file: 'latency_throughput.png', label: 'Latency, Throughput & Diff' },
+      { file: 'wait_breakdown.png', label: 'Wait Time Breakdown' },
+      { file: 'gas_vs_latency.png', label: 'Gas vs Latency' },
+    ];
+    md += `### Charts\n\n`;
+    for (const chart of charts) {
+      md += `<details><summary>${chart.label}</summary>\n\n`;
+      md += `![${chart.label}](${baseUrl}/${chart.file})\n\n`;
+      md += `</details>\n\n`;
+    }
+  }
+
+  // Samply profiles
+  const samplyUrls = loadSamplyUrls(process.env.BENCH_WORK_DIR);
+  const samplyLinks = Object.entries(samplyUrls).map(([run, url]) => `- **${run}**: [Firefox Profiler](${url})`);
+  if (samplyLinks.length > 0) {
+    md += `### Samply Profiles\n\n${samplyLinks.join('\n')}\n\n`;
+  }
+
+  // Grafana
+  if (grafanaUrl) {
+    md += `### Grafana Dashboard\n\n[View real-time metrics](${grafanaUrl})\n\n`;
+  }
+
+  // Node errors
+  try {
+    const errors = fs.readFileSync(process.env.BENCH_WORK_DIR + '/errors.md', 'utf8');
+    if (errors.trim()) md += '\n' + errors + '\n';
+  } catch {}
+
+  await core.summary.addRaw(md).write();
+};
diff --git a/.github/scripts/bench-slack-notify.js b/.github/scripts/bench-slack-notify.js
index ce91c01abe..89a9d0d357 100644
--- a/.github/scripts/bench-slack-notify.js
+++ b/.github/scripts/bench-slack-notify.js
@@ -18,6 +18,7 @@
 
 const fs = require('fs');
 const path = require('path');
+const { fmtChange, fmtMs, verdict, loadSamplyUrls, blocksLabel, metricRows, waitTimeRows } = require('./bench-utils');
 
 const SLACK_API = 'https://slack.com/api/chat.postMessage';
 
@@ -61,41 +62,17 @@ function cell(text) {
   return { type: 'raw_text', text: s || ' ' };
 }
 
+// Slack shortcodes for verdict (Block Kit header doesn't support unicode emoji)
+const SLACK_VERDICT = {
+  '⚠️': ':warning:',
+  '❌': ':x:',
+  '✅': ':white_check_mark:',
+  '⚪': ':white_circle:',
+};
+
 function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, repo, samplyUrls }) {
-  const b = summary.baseline.stats;
-  const f = summary.feature.stats;
-  const c = summary.changes;
-
-  const sigEmoji = { good: '\u2705', bad: '\u274c', neutral: '\u26aa' };
-
-  function fmtMs(v) { return v.toFixed(2) + 'ms'; }
-  function fmtMgas(v) { return v.toFixed(2); }
-  function fmtS(v) { return v.toFixed(2) + 's'; }
-  function fmtChange(ch) {
-    if (!ch.pct && !ch.ci_pct) return ' ';
-    const pctStr = `${ch.pct >= 0 ? '+' : ''}${ch.pct.toFixed(2)}%`;
-    const ciStr = ch.ci_pct ? ` (\u00b1${ch.ci_pct.toFixed(2)}%)` : '';
-    return `${pctStr}${ciStr} ${sigEmoji[ch.sig]}`;
-  }
-
-  // Overall result for header
-  const vals = Object.values(c);
-  const hasBad = vals.some(v => v.sig === 'bad');
-  const hasGood = vals.some(v => v.sig === 'good');
-  let headerEmoji, headerResult;
-  if (hasBad && hasGood) {
-    headerEmoji = ':warning:';
-    headerResult = 'Mixed Results';
-  } else if (hasBad) {
-    headerEmoji = ':x:';
-    headerResult = 'Regression';
-  } else if (hasGood) {
-    headerEmoji = ':white_check_mark:';
-    headerResult = 'Improvement';
-  } else {
-    headerEmoji = ':white_circle:';
-    headerResult = 'No Difference';
-  }
+  const { emoji, label } = verdict(summary.changes);
+  const headerEmoji = SLACK_VERDICT[emoji] || emoji;
 
   const prUrl = prNumber ? `https://github.com/${repo}/pull/${prNumber}` : '';
   const commitUrl = `https://github.com/${repo}/commit`;
@@ -120,19 +97,7 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re
   if (fl1) featureLine += ` | <${fl1}|Samply 1>`;
   if (fl2) featureLine += ` | <${fl2}|Samply 2>`;
 
-  const cores = process.env.BENCH_CORES || '0';
-  const countsParts = [];
-  if (summary.big_blocks) {
-    const gasRamp = summary.gas_ramp_blocks || 0;
-    if (gasRamp > 0) countsParts.push(`*Gas Ramp:* ${gasRamp}`);
-    countsParts.push(`*Big Blocks:* ${summary.blocks}`);
-  } else {
-    const warmup = summary.warmup_blocks || process.env.BENCH_WARMUP_BLOCKS || '';
-    if (warmup) countsParts.push(`*Warmup:* ${warmup}`);
-    countsParts.push(`*Blocks:* ${summary.blocks}`);
-  }
-  if (cores !== '0') countsParts.push(`*Cores:* ${cores}`);
-  const countsLine = countsParts.join(' | ');
+  const countsLine = blocksLabel(summary).map(p => `*${p.key}:* ${p.value}`).join(' | ');
 
   const baselineArgs = process.env.BENCH_BASELINE_ARGS || '';
   const featureArgs = process.env.BENCH_FEATURE_ARGS || '';
@@ -159,10 +124,17 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re
     },
   ];
 
+  // Build table rows from shared metricRows
+  const rows = metricRows(summary);
+  const tableRows = [
+    [cell('Metric'), cell('Baseline'), cell('Feature'), cell('Change')],
+    ...rows.map(r => [cell(r.label), cell(r.baseline), cell(r.feature), cell(r.change || ' ')]),
+  ];
+
   const blocks = [
     {
       type: 'header',
-      text: { type: 'plain_text', text: `${headerEmoji} ${headerResult}`, emoji: true },
+      text: { type: 'plain_text', text: `${headerEmoji} ${label}`, emoji: true },
     },
     {
       type: 'section',
@@ -176,16 +148,7 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re
         { align: 'right' },
         { align: 'right' },
       ],
-      rows: [
-        [cell('Metric'),  cell('Baseline'), cell('Feature'), cell('Change')],
-        [cell('Mean'),     cell(fmtMs(b.mean_ms)),      cell(fmtMs(f.mean_ms)),      cell(fmtChange(c.mean))],
-        [cell('StdDev'),   cell(fmtMs(b.stddev_ms)),    cell(fmtMs(f.stddev_ms)),    cell(' ')],
-        [cell('P50'),      cell(fmtMs(b.p50_ms)),       cell(fmtMs(f.p50_ms)),       cell(fmtChange(c.p50))],
-        [cell('P90'),      cell(fmtMs(b.p90_ms)),       cell(fmtMs(f.p90_ms)),       cell(fmtChange(c.p90))],
-        [cell('P99'),      cell(fmtMs(b.p99_ms)),       cell(fmtMs(f.p99_ms)),       cell(fmtChange(c.p99))],
-        [cell('Mgas/s'),   cell(fmtMgas(b.mean_mgas_s)), cell(fmtMgas(f.mean_mgas_s)), cell(fmtChange(c.mgas_s))],
-        [cell('Wall Clock'), cell(fmtS(b.wall_clock_s)), cell(fmtS(f.wall_clock_s)), cell(fmtChange(c.wall_clock))],
-      ],
+      rows: tableRows,
     },
     {
       type: 'actions',
@@ -195,16 +158,12 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re
 
   // Wait times as a separate table block (sent as threaded reply due to Slack one-table limit)
   const threadBlocks = [];
-  const waitTimes = summary.wait_times || {};
-  const waitKeys = Object.keys(waitTimes);
-  if (waitKeys.length > 0) {
-    const waitRows = [
+  const wtRows = waitTimeRows(summary);
+  if (wtRows.length > 0) {
+    const waitTableRows = [
       [cell('Wait Time'), cell('Baseline'), cell('Feature')],
+      ...wtRows.map(r => [cell(r.title), cell(r.baseline), cell(r.feature)]),
     ];
-    for (const key of waitKeys) {
-      const wt = waitTimes[key];
-      waitRows.push([cell(wt.title), cell(fmtMs(wt.baseline.mean_ms)), cell(fmtMs(wt.feature.mean_ms))]);
-    }
     threadBlocks.push({
       type: 'table',
       column_settings: [
@@ -212,7 +171,7 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re
         { align: 'right' },
         { align: 'right' },
       ],
-      rows: waitRows,
+      rows: waitTableRows,
     });
   }
 
@@ -274,16 +233,7 @@ async function success({ core, context }) {
   const jobUrl = process.env.BENCH_JOB_URL ||
     `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
 
-  // Load samply profile URLs (files exist when samply profiling was enabled)
-  const samplyUrls = {};
-  for (const run of ['baseline-1', 'baseline-2', 'feature-1', 'feature-2']) {
-    try {
-      const url = fs.readFileSync(
-        path.join(process.env.BENCH_WORK_DIR, run, 'samply-profile-url.txt'), 'utf8'
-      ).trim();
-      if (url) samplyUrls[run] = url;
-    } catch {}
-  }
+  const samplyUrls = loadSamplyUrls(process.env.BENCH_WORK_DIR);
 
   const slackUsers = loadSlackUsers(process.env.GITHUB_WORKSPACE || '.');
   const actorSlackId = slackUsers[actor];
diff --git a/.github/scripts/bench-utils.js b/.github/scripts/bench-utils.js
new file mode 100644
index 0000000000..19d6b27f3f
--- /dev/null
+++ b/.github/scripts/bench-utils.js
@@ -0,0 +1,96 @@
+// Shared utilities for reth-bench result rendering.
+//
+// Used by bench-job-summary.js and bench-slack-notify.js.
+
+const fs = require('fs');
+const path = require('path');
+
+const SIG_EMOJI = { good: '✅', bad: '❌', neutral: '⚪' };
+
+function fmtMs(v) { return v.toFixed(2) + 'ms'; }
+function fmtMgas(v) { return v.toFixed(2); }
+function fmtS(v) { return v.toFixed(2) + 's'; }
+
+function fmtChange(ch) {
+  if (!ch || (!ch.pct && !ch.ci_pct)) return '';
+  const pctStr = `${ch.pct >= 0 ? '+' : ''}${ch.pct.toFixed(2)}%`;
+  const ciStr = ch.ci_pct ? ` (±${ch.ci_pct.toFixed(2)}%)` : '';
+  return `${pctStr}${ciStr} ${SIG_EMOJI[ch.sig]}`;
+}
+
+function verdict(changes) {
+  const vals = Object.values(changes);
+  const hasBad = vals.some(v => v.sig === 'bad');
+  const hasGood = vals.some(v => v.sig === 'good');
+  if (hasBad && hasGood) return { emoji: '⚠️', label: 'Mixed Results' };
+  if (hasBad) return { emoji: '❌', label: 'Regression' };
+  if (hasGood) return { emoji: '✅', label: 'Improvement' };
+  return { emoji: '⚪', label: 'No Difference' };
+}
+
+function loadSamplyUrls(workDir) {
+  const urls = {};
+  for (const run of ['baseline-1', 'baseline-2', 'feature-1', 'feature-2']) {
+    try {
+      const url = fs.readFileSync(path.join(workDir, run, 'samply-profile-url.txt'), 'utf8').trim();
+      if (url) urls[run] = url;
+    } catch {}
+  }
+  return urls;
+}
+
+function blocksLabel(summary) {
+  const parts = [];
+  if (summary.big_blocks) {
+    if (summary.gas_ramp_blocks) parts.push({ key: 'Gas Ramp', value: summary.gas_ramp_blocks });
+    parts.push({ key: 'Big Blocks', value: summary.blocks });
+  } else {
+    const warmup = summary.warmup_blocks || process.env.BENCH_WARMUP_BLOCKS || '';
+    if (warmup) parts.push({ key: 'Warmup', value: warmup });
+    parts.push({ key: 'Blocks', value: summary.blocks });
+  }
+  const cores = process.env.BENCH_CORES || '0';
+  if (cores !== '0') parts.push({ key: 'Cores', value: cores });
+  return parts;
+}
+
+// The 7 metric rows shared by all renderers.
+// Returns an array of { label, baseline, feature, change } objects.
+function metricRows(summary) {
+  const b = summary.baseline.stats;
+  const f = summary.feature.stats;
+  const c = summary.changes;
+  return [
+    { label: 'Mean',       baseline: fmtMs(b.mean_ms),       feature: fmtMs(f.mean_ms),       change: fmtChange(c.mean) },
+    { label: 'StdDev',     baseline: fmtMs(b.stddev_ms),     feature: fmtMs(f.stddev_ms),     change: '' },
+    { label: 'P50',        baseline: fmtMs(b.p50_ms),        feature: fmtMs(f.p50_ms),        change: fmtChange(c.p50) },
+    { label: 'P90',        baseline: fmtMs(b.p90_ms),        feature: fmtMs(f.p90_ms),        change: fmtChange(c.p90) },
+    { label: 'P99',        baseline: fmtMs(b.p99_ms),        feature: fmtMs(f.p99_ms),        change: fmtChange(c.p99) },
+    { label: 'Mgas/s',     baseline: fmtMgas(b.mean_mgas_s), feature: fmtMgas(f.mean_mgas_s), change: fmtChange(c.mgas_s) },
+    { label: 'Wall Clock', baseline: fmtS(b.wall_clock_s),   feature: fmtS(f.wall_clock_s),   change: fmtChange(c.wall_clock) },
+  ];
+}
+
+// Wait time rows: one row per metric showing mean values.
+function waitTimeRows(summary) {
+  const waitTimes = summary.wait_times || {};
+  const rows = [];
+  for (const key of Object.keys(waitTimes)) {
+    const wt = waitTimes[key];
+    rows.push({ title: wt.title, baseline: fmtMs(wt.baseline.mean_ms), feature: fmtMs(wt.feature.mean_ms) });
+  }
+  return rows;
+}
+
+module.exports = {
+  SIG_EMOJI,
+  fmtMs,
+  fmtMgas,
+  fmtS,
+  fmtChange,
+  verdict,
+  loadSamplyUrls,
+  blocksLabel,
+  metricRows,
+  waitTimeRows,
+};
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index ad14effba6..f0ad2925ed 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -1195,11 +1195,22 @@ jobs:
                 comment_id: parseInt(ackCommentId),
                 body,
               });
-            } else {
-              // No PR — write results to job summary
-              await core.summary.addRaw(body).write();
             }
 
+      - name: Write job summary
+        if: success()
+        uses: actions/github-script@v8
+        with:
+          script: |
+            const jobSummary = require('./.github/scripts/bench-job-summary.js');
+            await jobSummary({
+              core,
+              context,
+              chartSha: '${{ steps.push-charts.outputs.sha }}',
+              grafanaUrl: '${{ steps.metrics.outputs.grafana-url }}',
+              runId: '${{ github.run_id }}',
+            });
+
       - name: Send Slack notification (success)
         if: success() && env.BENCH_NO_SLACK != 'true'
         uses: actions/github-script@v8