From acdbd065e2a583d4c2908bc5543215203e370553 Mon Sep 17 00:00:00 2001 From: Derek Cofausper <256792747+decofe@users.noreply.github.com> Date: Mon, 16 Mar 2026 03:12:35 -0700 Subject: [PATCH] chore(bench): add rich job summary matching Slack output (#23046) Co-authored-by: Sergei Shulepov <2205845+pepyakin@users.noreply.github.com> --- .github/scripts/bench-job-summary.js | 106 ++++++++++++++++++++++++++ .github/scripts/bench-slack-notify.js | 104 +++++++------------------ .github/scripts/bench-utils.js | 96 +++++++++++++++++++++++ .github/workflows/bench.yml | 17 ++++- 4 files changed, 243 insertions(+), 80 deletions(-) create mode 100644 .github/scripts/bench-job-summary.js create mode 100644 .github/scripts/bench-utils.js diff --git a/.github/scripts/bench-job-summary.js b/.github/scripts/bench-job-summary.js new file mode 100644 index 0000000000..21caf88c7d --- /dev/null +++ b/.github/scripts/bench-job-summary.js @@ -0,0 +1,106 @@ +// Generates a rich GitHub Actions job summary for reth-bench results. +// +// Reads from environment: +// BENCH_WORK_DIR – Directory containing summary.json +// BENCH_PR – PR number (may be empty) +// BENCH_ACTOR – GitHub user who triggered the bench +// BENCH_CORES – CPU core limit (0 = all) +// BENCH_WARMUP_BLOCKS – Number of warmup blocks +// BENCH_SAMPLY – 'true' if samply profiling was enabled +// BENCH_ABBA – 'true' if ABBA interleaved order was used +// +// Usage from actions/github-script: +// const jobSummary = require('./.github/scripts/bench-job-summary.js'); +// await jobSummary({ core, context, chartSha, grafanaUrl, runId }); + +const fs = require('fs'); +const { verdict, loadSamplyUrls, blocksLabel, metricRows, waitTimeRows } = require('./bench-utils'); + +module.exports = async function ({ core, context, chartSha, grafanaUrl, runId }) { + let summary; + try { + summary = JSON.parse(fs.readFileSync(process.env.BENCH_WORK_DIR + '/summary.json', 'utf8')); + } catch (e) { + await core.summary.addRaw('⚠️ Benchmark completed but failed to load summary.').write(); + return; + } + + const repo = `${context.repo.owner}/${context.repo.repo}`; + const prNumber = process.env.BENCH_PR; + const actor = process.env.BENCH_ACTOR; + const commitUrl = `https://github.com/${repo}/commit`; + + const { emoji, label } = verdict(summary.changes); + const baselineLink = `[\`${summary.baseline.name}\`](${commitUrl}/${summary.baseline.ref})`; + const featureLink = `[\`${summary.feature.name}\`](${commitUrl}/${summary.feature.ref})`; + const diffUrl = `https://github.com/${repo}/compare/${summary.baseline.ref}...${summary.feature.ref}`; + + // Header & metadata + const metaParts = []; + if (prNumber) metaParts.push(`**[PR #${prNumber}](https://github.com/${repo}/pull/${prNumber})**`); + metaParts.push(`triggered by @${actor}`); + + let md = `# ${emoji} ${label}\n\n`; + md += metaParts.join(' · ') + '\n\n'; + md += `**Baseline:** ${baselineLink}\n`; + md += `**Feature:** ${featureLink} ([diff](${diffUrl}))\n`; + md += blocksLabel(summary).map(p => `**${p.key}:** ${p.value}`).join(' · ') + '\n\n'; + + // Main comparison table + const rows = metricRows(summary); + md += `| Metric | Baseline | Feature | Change |\n`; + md += `|--------|----------|---------|--------|\n`; + for (const r of rows) { + md += `| ${r.label} | ${r.baseline} | ${r.feature} | ${r.change} |\n`; + } + md += '\n'; + + // Wait time breakdown + const wtRows = waitTimeRows(summary); + if (wtRows.length > 0) { + md += `### Wait Time Breakdown\n\n`; + md += `| Metric | Baseline | Feature |\n`; + md += `|--------|----------|--------|\n`; + for (const r of wtRows) { + md += `| ${r.title} | ${r.baseline} | ${r.feature} |\n`; + } + md += '\n'; + } + + // Charts + if (chartSha) { + const prNum = prNumber || '0'; + const baseUrl = `https://raw.githubusercontent.com/decofe/reth-bench-charts/${chartSha}/pr/${prNum}/${runId}`; + const charts = [ + { file: 'latency_throughput.png', label: 'Latency, Throughput & Diff' }, + { file: 'wait_breakdown.png', label: 'Wait Time Breakdown' }, + { file: 'gas_vs_latency.png', label: 'Gas vs Latency' }, + ]; + md += `### Charts\n\n`; + for (const chart of charts) { + md += `
${chart.label}\n\n`; + md += `![${chart.label}](${baseUrl}/${chart.file})\n\n`; + md += `
\n\n`; + } + } + + // Samply profiles + const samplyUrls = loadSamplyUrls(process.env.BENCH_WORK_DIR); + const samplyLinks = Object.entries(samplyUrls).map(([run, url]) => `- **${run}**: [Firefox Profiler](${url})`); + if (samplyLinks.length > 0) { + md += `### Samply Profiles\n\n${samplyLinks.join('\n')}\n\n`; + } + + // Grafana + if (grafanaUrl) { + md += `### Grafana Dashboard\n\n[View real-time metrics](${grafanaUrl})\n\n`; + } + + // Node errors + try { + const errors = fs.readFileSync(process.env.BENCH_WORK_DIR + '/errors.md', 'utf8'); + if (errors.trim()) md += '\n' + errors + '\n'; + } catch {} + + await core.summary.addRaw(md).write(); +}; diff --git a/.github/scripts/bench-slack-notify.js b/.github/scripts/bench-slack-notify.js index ce91c01abe..89a9d0d357 100644 --- a/.github/scripts/bench-slack-notify.js +++ b/.github/scripts/bench-slack-notify.js @@ -18,6 +18,7 @@ const fs = require('fs'); const path = require('path'); +const { fmtChange, fmtMs, verdict, loadSamplyUrls, blocksLabel, metricRows, waitTimeRows } = require('./bench-utils'); const SLACK_API = 'https://slack.com/api/chat.postMessage'; @@ -61,41 +62,17 @@ function cell(text) { return { type: 'raw_text', text: s || ' ' }; } +// Slack shortcodes for verdict (Block Kit header doesn't support unicode emoji) +const SLACK_VERDICT = { + '⚠️': ':warning:', + '❌': ':x:', + '✅': ':white_check_mark:', + '⚪': ':white_circle:', +}; + function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, repo, samplyUrls }) { - const b = summary.baseline.stats; - const f = summary.feature.stats; - const c = summary.changes; - - const sigEmoji = { good: '\u2705', bad: '\u274c', neutral: '\u26aa' }; - - function fmtMs(v) { return v.toFixed(2) + 'ms'; } - function fmtMgas(v) { return v.toFixed(2); } - function fmtS(v) { return v.toFixed(2) + 's'; } - function fmtChange(ch) { - if (!ch.pct && !ch.ci_pct) return ' '; - const pctStr = `${ch.pct >= 0 ? '+' : ''}${ch.pct.toFixed(2)}%`; - const ciStr = ch.ci_pct ? ` (\u00b1${ch.ci_pct.toFixed(2)}%)` : ''; - return `${pctStr}${ciStr} ${sigEmoji[ch.sig]}`; - } - - // Overall result for header - const vals = Object.values(c); - const hasBad = vals.some(v => v.sig === 'bad'); - const hasGood = vals.some(v => v.sig === 'good'); - let headerEmoji, headerResult; - if (hasBad && hasGood) { - headerEmoji = ':warning:'; - headerResult = 'Mixed Results'; - } else if (hasBad) { - headerEmoji = ':x:'; - headerResult = 'Regression'; - } else if (hasGood) { - headerEmoji = ':white_check_mark:'; - headerResult = 'Improvement'; - } else { - headerEmoji = ':white_circle:'; - headerResult = 'No Difference'; - } + const { emoji, label } = verdict(summary.changes); + const headerEmoji = SLACK_VERDICT[emoji] || emoji; const prUrl = prNumber ? `https://github.com/${repo}/pull/${prNumber}` : ''; const commitUrl = `https://github.com/${repo}/commit`; @@ -120,19 +97,7 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re if (fl1) featureLine += ` | <${fl1}|Samply 1>`; if (fl2) featureLine += ` | <${fl2}|Samply 2>`; - const cores = process.env.BENCH_CORES || '0'; - const countsParts = []; - if (summary.big_blocks) { - const gasRamp = summary.gas_ramp_blocks || 0; - if (gasRamp > 0) countsParts.push(`*Gas Ramp:* ${gasRamp}`); - countsParts.push(`*Big Blocks:* ${summary.blocks}`); - } else { - const warmup = summary.warmup_blocks || process.env.BENCH_WARMUP_BLOCKS || ''; - if (warmup) countsParts.push(`*Warmup:* ${warmup}`); - countsParts.push(`*Blocks:* ${summary.blocks}`); - } - if (cores !== '0') countsParts.push(`*Cores:* ${cores}`); - const countsLine = countsParts.join(' | '); + const countsLine = blocksLabel(summary).map(p => `*${p.key}:* ${p.value}`).join(' | '); const baselineArgs = process.env.BENCH_BASELINE_ARGS || ''; const featureArgs = process.env.BENCH_FEATURE_ARGS || ''; @@ -159,10 +124,17 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re }, ]; + // Build table rows from shared metricRows + const rows = metricRows(summary); + const tableRows = [ + [cell('Metric'), cell('Baseline'), cell('Feature'), cell('Change')], + ...rows.map(r => [cell(r.label), cell(r.baseline), cell(r.feature), cell(r.change || ' ')]), + ]; + const blocks = [ { type: 'header', - text: { type: 'plain_text', text: `${headerEmoji} ${headerResult}`, emoji: true }, + text: { type: 'plain_text', text: `${headerEmoji} ${label}`, emoji: true }, }, { type: 'section', @@ -176,16 +148,7 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re { align: 'right' }, { align: 'right' }, ], - rows: [ - [cell('Metric'), cell('Baseline'), cell('Feature'), cell('Change')], - [cell('Mean'), cell(fmtMs(b.mean_ms)), cell(fmtMs(f.mean_ms)), cell(fmtChange(c.mean))], - [cell('StdDev'), cell(fmtMs(b.stddev_ms)), cell(fmtMs(f.stddev_ms)), cell(' ')], - [cell('P50'), cell(fmtMs(b.p50_ms)), cell(fmtMs(f.p50_ms)), cell(fmtChange(c.p50))], - [cell('P90'), cell(fmtMs(b.p90_ms)), cell(fmtMs(f.p90_ms)), cell(fmtChange(c.p90))], - [cell('P99'), cell(fmtMs(b.p99_ms)), cell(fmtMs(f.p99_ms)), cell(fmtChange(c.p99))], - [cell('Mgas/s'), cell(fmtMgas(b.mean_mgas_s)), cell(fmtMgas(f.mean_mgas_s)), cell(fmtChange(c.mgas_s))], - [cell('Wall Clock'), cell(fmtS(b.wall_clock_s)), cell(fmtS(f.wall_clock_s)), cell(fmtChange(c.wall_clock))], - ], + rows: tableRows, }, { type: 'actions', @@ -195,16 +158,12 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re // Wait times as a separate table block (sent as threaded reply due to Slack one-table limit) const threadBlocks = []; - const waitTimes = summary.wait_times || {}; - const waitKeys = Object.keys(waitTimes); - if (waitKeys.length > 0) { - const waitRows = [ + const wtRows = waitTimeRows(summary); + if (wtRows.length > 0) { + const waitTableRows = [ [cell('Wait Time'), cell('Baseline'), cell('Feature')], + ...wtRows.map(r => [cell(r.title), cell(r.baseline), cell(r.feature)]), ]; - for (const key of waitKeys) { - const wt = waitTimes[key]; - waitRows.push([cell(wt.title), cell(fmtMs(wt.baseline.mean_ms)), cell(fmtMs(wt.feature.mean_ms))]); - } threadBlocks.push({ type: 'table', column_settings: [ @@ -212,7 +171,7 @@ function buildSuccessBlocks({ summary, prNumber, actor, actorSlackId, jobUrl, re { align: 'right' }, { align: 'right' }, ], - rows: waitRows, + rows: waitTableRows, }); } @@ -274,16 +233,7 @@ async function success({ core, context }) { const jobUrl = process.env.BENCH_JOB_URL || `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; - // Load samply profile URLs (files exist when samply profiling was enabled) - const samplyUrls = {}; - for (const run of ['baseline-1', 'baseline-2', 'feature-1', 'feature-2']) { - try { - const url = fs.readFileSync( - path.join(process.env.BENCH_WORK_DIR, run, 'samply-profile-url.txt'), 'utf8' - ).trim(); - if (url) samplyUrls[run] = url; - } catch {} - } + const samplyUrls = loadSamplyUrls(process.env.BENCH_WORK_DIR); const slackUsers = loadSlackUsers(process.env.GITHUB_WORKSPACE || '.'); const actorSlackId = slackUsers[actor]; diff --git a/.github/scripts/bench-utils.js b/.github/scripts/bench-utils.js new file mode 100644 index 0000000000..19d6b27f3f --- /dev/null +++ b/.github/scripts/bench-utils.js @@ -0,0 +1,96 @@ +// Shared utilities for reth-bench result rendering. +// +// Used by bench-job-summary.js and bench-slack-notify.js. + +const fs = require('fs'); +const path = require('path'); + +const SIG_EMOJI = { good: '✅', bad: '❌', neutral: '⚪' }; + +function fmtMs(v) { return v.toFixed(2) + 'ms'; } +function fmtMgas(v) { return v.toFixed(2); } +function fmtS(v) { return v.toFixed(2) + 's'; } + +function fmtChange(ch) { + if (!ch || (!ch.pct && !ch.ci_pct)) return ''; + const pctStr = `${ch.pct >= 0 ? '+' : ''}${ch.pct.toFixed(2)}%`; + const ciStr = ch.ci_pct ? ` (±${ch.ci_pct.toFixed(2)}%)` : ''; + return `${pctStr}${ciStr} ${SIG_EMOJI[ch.sig]}`; +} + +function verdict(changes) { + const vals = Object.values(changes); + const hasBad = vals.some(v => v.sig === 'bad'); + const hasGood = vals.some(v => v.sig === 'good'); + if (hasBad && hasGood) return { emoji: '⚠️', label: 'Mixed Results' }; + if (hasBad) return { emoji: '❌', label: 'Regression' }; + if (hasGood) return { emoji: '✅', label: 'Improvement' }; + return { emoji: '⚪', label: 'No Difference' }; +} + +function loadSamplyUrls(workDir) { + const urls = {}; + for (const run of ['baseline-1', 'baseline-2', 'feature-1', 'feature-2']) { + try { + const url = fs.readFileSync(path.join(workDir, run, 'samply-profile-url.txt'), 'utf8').trim(); + if (url) urls[run] = url; + } catch {} + } + return urls; +} + +function blocksLabel(summary) { + const parts = []; + if (summary.big_blocks) { + if (summary.gas_ramp_blocks) parts.push({ key: 'Gas Ramp', value: summary.gas_ramp_blocks }); + parts.push({ key: 'Big Blocks', value: summary.blocks }); + } else { + const warmup = summary.warmup_blocks || process.env.BENCH_WARMUP_BLOCKS || ''; + if (warmup) parts.push({ key: 'Warmup', value: warmup }); + parts.push({ key: 'Blocks', value: summary.blocks }); + } + const cores = process.env.BENCH_CORES || '0'; + if (cores !== '0') parts.push({ key: 'Cores', value: cores }); + return parts; +} + +// The 7 metric rows shared by all renderers. +// Returns an array of { label, baseline, feature, change } objects. +function metricRows(summary) { + const b = summary.baseline.stats; + const f = summary.feature.stats; + const c = summary.changes; + return [ + { label: 'Mean', baseline: fmtMs(b.mean_ms), feature: fmtMs(f.mean_ms), change: fmtChange(c.mean) }, + { label: 'StdDev', baseline: fmtMs(b.stddev_ms), feature: fmtMs(f.stddev_ms), change: '' }, + { label: 'P50', baseline: fmtMs(b.p50_ms), feature: fmtMs(f.p50_ms), change: fmtChange(c.p50) }, + { label: 'P90', baseline: fmtMs(b.p90_ms), feature: fmtMs(f.p90_ms), change: fmtChange(c.p90) }, + { label: 'P99', baseline: fmtMs(b.p99_ms), feature: fmtMs(f.p99_ms), change: fmtChange(c.p99) }, + { label: 'Mgas/s', baseline: fmtMgas(b.mean_mgas_s), feature: fmtMgas(f.mean_mgas_s), change: fmtChange(c.mgas_s) }, + { label: 'Wall Clock', baseline: fmtS(b.wall_clock_s), feature: fmtS(f.wall_clock_s), change: fmtChange(c.wall_clock) }, + ]; +} + +// Wait time rows: one row per metric showing mean values. +function waitTimeRows(summary) { + const waitTimes = summary.wait_times || {}; + const rows = []; + for (const key of Object.keys(waitTimes)) { + const wt = waitTimes[key]; + rows.push({ title: wt.title, baseline: fmtMs(wt.baseline.mean_ms), feature: fmtMs(wt.feature.mean_ms) }); + } + return rows; +} + +module.exports = { + SIG_EMOJI, + fmtMs, + fmtMgas, + fmtS, + fmtChange, + verdict, + loadSamplyUrls, + blocksLabel, + metricRows, + waitTimeRows, +}; diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index ad14effba6..f0ad2925ed 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -1195,11 +1195,22 @@ jobs: comment_id: parseInt(ackCommentId), body, }); - } else { - // No PR — write results to job summary - await core.summary.addRaw(body).write(); } + - name: Write job summary + if: success() + uses: actions/github-script@v8 + with: + script: | + const jobSummary = require('./.github/scripts/bench-job-summary.js'); + await jobSummary({ + core, + context, + chartSha: '${{ steps.push-charts.outputs.sha }}', + grafanaUrl: '${{ steps.metrics.outputs.grafana-url }}', + runId: '${{ github.run_id }}', + }); + - name: Send Slack notification (success) if: success() && env.BENCH_NO_SLACK != 'true' uses: actions/github-script@v8