mirror of
https://github.com/paradigmxyz/reth.git
synced 2026-02-19 03:04:27 -05:00
ci(bench): download snapshot in parallel with builds (#22332)
This commit is contained in:
125
.github/scripts/bench-reth-snapshot.sh
vendored
Executable file
125
.github/scripts/bench-reth-snapshot.sh
vendored
Executable file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Downloads the latest nightly snapshot into the schelk volume with
|
||||
# progress reporting to the GitHub PR comment.
|
||||
#
|
||||
# Skips the download if the local ETag marker matches the remote one.
|
||||
#
|
||||
# Usage: bench-reth-snapshot.sh [--check]
|
||||
# --check Only check if a download is needed; exits 0 if up-to-date, 1 if not.
|
||||
#
|
||||
# Required env:
|
||||
# SCHELK_MOUNT – schelk mount point (e.g. /reth-bench)
|
||||
# GITHUB_TOKEN – token for GitHub API calls (only for download)
|
||||
# BENCH_COMMENT_ID – PR comment ID to update (optional)
|
||||
# BENCH_REPO – owner/repo (e.g. paradigmxyz/reth)
|
||||
# BENCH_JOB_URL – link to the Actions job
|
||||
# BENCH_ACTOR – user who triggered the benchmark
|
||||
# BENCH_CONFIG – config summary line
|
||||
set -euo pipefail
|
||||
|
||||
BUCKET="minio/reth-snapshots/reth-1-minimal-nightly-previous.tar.zst"
|
||||
DATADIR="$SCHELK_MOUNT/datadir"
|
||||
ETAG_FILE="$HOME/.reth-bench-snapshot-etag"
|
||||
|
||||
# Get remote metadata via JSON for reliable parsing
|
||||
MC_STAT=$(mc stat --json "$BUCKET" 2>/dev/null || true)
|
||||
REMOTE_ETAG=$(echo "$MC_STAT" | jq -r '.etag // empty')
|
||||
if [ -z "$REMOTE_ETAG" ]; then
|
||||
echo "::warning::Failed to get ETag from mc stat, will re-download"
|
||||
REMOTE_ETAG="unknown-$(date +%s)"
|
||||
fi
|
||||
|
||||
LOCAL_ETAG=""
|
||||
[ -f "$ETAG_FILE" ] && LOCAL_ETAG=$(cat "$ETAG_FILE")
|
||||
|
||||
if [ "$REMOTE_ETAG" = "$LOCAL_ETAG" ]; then
|
||||
echo "Snapshot is up-to-date (ETag: ${REMOTE_ETAG})"
|
||||
if [ "${1:-}" = "--check" ]; then
|
||||
exit 0
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Snapshot needs update (local: ${LOCAL_ETAG:-<none>}, remote: ${REMOTE_ETAG})"
|
||||
if [ "${1:-}" = "--check" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get compressed size for progress tracking
|
||||
TOTAL_BYTES=$(echo "$MC_STAT" | jq -r '.size // empty')
|
||||
if [ -z "$TOTAL_BYTES" ] || [ "$TOTAL_BYTES" = "0" ]; then
|
||||
echo "::error::Failed to get snapshot size from mc stat"
|
||||
exit 1
|
||||
fi
|
||||
echo "Snapshot size: $TOTAL_BYTES bytes ($(numfmt --to=iec "$TOTAL_BYTES"))"
|
||||
|
||||
# Prepare mount
|
||||
mountpoint -q "$SCHELK_MOUNT" && sudo schelk recover -y || true
|
||||
sudo schelk mount -y
|
||||
sudo rm -rf "$DATADIR"
|
||||
sudo mkdir -p "$DATADIR"
|
||||
|
||||
update_comment() {
|
||||
local pct="$1"
|
||||
[ -z "${BENCH_COMMENT_ID:-}" ] && return 0
|
||||
local status="Building binaries & downloading snapshot… ${pct}%"
|
||||
local body
|
||||
body="$(printf 'cc @%s\n\n🚀 Benchmark started! [View job](%s)\n\n⏳ **Status:** %s\n\n%s' \
|
||||
"$BENCH_ACTOR" "$BENCH_JOB_URL" "$status" "$BENCH_CONFIG")"
|
||||
curl -sf -X PATCH \
|
||||
-H "Authorization: token ${GITHUB_TOKEN}" \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
"https://api.github.com/repos/${BENCH_REPO}/issues/comments/${BENCH_COMMENT_ID}" \
|
||||
-d "$(jq -nc --arg body "$body" '{body: $body}')" \
|
||||
> /dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
# Track compressed bytes flowing through the pipe
|
||||
DL_BYTES_FILE=$(mktemp)
|
||||
echo 0 > "$DL_BYTES_FILE"
|
||||
|
||||
# Start progress reporter in background
|
||||
(
|
||||
while true; do
|
||||
sleep 10
|
||||
CURRENT=$(cat "$DL_BYTES_FILE" 2>/dev/null || echo 0)
|
||||
if [ "$TOTAL_BYTES" -gt 0 ]; then
|
||||
PCT=$(( CURRENT * 100 / TOTAL_BYTES ))
|
||||
[ "$PCT" -gt 100 ] && PCT=100
|
||||
echo "Snapshot download: $(numfmt --to=iec "$CURRENT") / $(numfmt --to=iec "$TOTAL_BYTES") (${PCT}%)"
|
||||
update_comment "$PCT"
|
||||
fi
|
||||
done
|
||||
) &
|
||||
PROGRESS_PID=$!
|
||||
trap 'kill $PROGRESS_PID 2>/dev/null || true; rm -f "$DL_BYTES_FILE"' EXIT
|
||||
|
||||
# Download and extract; python byte counter tracks compressed bytes received
|
||||
mc cat "$BUCKET" | python3 -c "
|
||||
import sys
|
||||
count = 0
|
||||
while True:
|
||||
data = sys.stdin.buffer.read(1048576)
|
||||
if not data:
|
||||
break
|
||||
count += len(data)
|
||||
sys.stdout.buffer.write(data)
|
||||
with open('$DL_BYTES_FILE', 'w') as f:
|
||||
f.write(str(count))
|
||||
" | pzstd -d -p 6 | sudo tar -xf - -C "$DATADIR"
|
||||
|
||||
# Stop progress reporter
|
||||
kill $PROGRESS_PID 2>/dev/null || true
|
||||
wait $PROGRESS_PID 2>/dev/null || true
|
||||
|
||||
update_comment "100"
|
||||
echo "Snapshot download complete"
|
||||
|
||||
# Sync the new snapshot as the schelk baseline
|
||||
sync
|
||||
sudo schelk recover -y
|
||||
|
||||
# Save ETag marker
|
||||
echo "$REMOTE_ETAG" > "$ETAG_FILE"
|
||||
echo "Snapshot synced to schelk (ETag: ${REMOTE_ETAG})"
|
||||
36
.github/workflows/bench.yml
vendored
36
.github/workflows/bench.yml
vendored
@@ -417,7 +417,7 @@ jobs:
|
||||
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
missing=()
|
||||
for cmd in mc schelk cpupower taskset stdbuf python3 curl make uv; do
|
||||
for cmd in mc schelk cpupower taskset stdbuf python3 curl make uv pzstd jq; do
|
||||
command -v "$cmd" &>/dev/null || missing+=("$cmd")
|
||||
done
|
||||
if [ ${#missing[@]} -gt 0 ]; then
|
||||
@@ -474,6 +474,23 @@ jobs:
|
||||
echo "feature-ref=$FEATURE_REF" >> "$GITHUB_OUTPUT"
|
||||
echo "feature-name=$FEATURE_NAME" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check if snapshot needs update
|
||||
id: snapshot-check
|
||||
run: |
|
||||
if .github/scripts/bench-reth-snapshot.sh --check; then
|
||||
echo "needed=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "needed=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Update status (snapshot needed)
|
||||
if: env.BENCH_COMMENT_ID && steps.snapshot-check.outputs.needed == 'true'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const s = require('./.github/scripts/bench-update-status.js');
|
||||
await s({github, context, status: 'Building binaries & downloading snapshot...'});
|
||||
|
||||
- name: Prepare source dirs
|
||||
run: |
|
||||
BASELINE_REF="${{ steps.refs.outputs.baseline-ref }}"
|
||||
@@ -485,8 +502,12 @@ jobs:
|
||||
git -C ../reth-baseline checkout "$BASELINE_REF"
|
||||
ln -sfn "$(pwd)" ../reth-feature
|
||||
|
||||
- name: Build baseline and feature binaries in parallel
|
||||
- name: Build binaries and download snapshot in parallel
|
||||
id: build
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
BENCH_REPO: ${{ github.repository }}
|
||||
SNAPSHOT_NEEDED: ${{ steps.snapshot-check.outputs.needed }}
|
||||
run: |
|
||||
BASELINE_DIR="$(cd ../reth-baseline && pwd)"
|
||||
FEATURE_DIR="$(cd ../reth-feature && pwd)"
|
||||
@@ -496,11 +517,18 @@ jobs:
|
||||
.github/scripts/bench-reth-build.sh feature "${FEATURE_DIR}" "${{ steps.refs.outputs.feature-ref }}" &
|
||||
PID_FEATURE=$!
|
||||
|
||||
PID_SNAPSHOT=
|
||||
if [ "$SNAPSHOT_NEEDED" = "true" ]; then
|
||||
.github/scripts/bench-reth-snapshot.sh &
|
||||
PID_SNAPSHOT=$!
|
||||
fi
|
||||
|
||||
FAIL=0
|
||||
wait $PID_BASELINE || FAIL=1
|
||||
wait $PID_FEATURE || FAIL=1
|
||||
[ -n "$PID_SNAPSHOT" ] && { wait $PID_SNAPSHOT || FAIL=1; }
|
||||
if [ $FAIL -ne 0 ]; then
|
||||
echo "::error::One or both builds failed"
|
||||
echo "::error::One or more parallel tasks failed (builds / snapshot download)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -710,7 +738,7 @@ jobs:
|
||||
with:
|
||||
script: |
|
||||
const steps_status = [
|
||||
['building binaries', '${{ steps.build.outcome }}'],
|
||||
['building binaries${{ steps.snapshot-check.outputs.needed == 'true' && ' & downloading snapshot' || '' }}', '${{ steps.build.outcome }}'],
|
||||
['running baseline benchmark', '${{ steps.run-baseline.outcome }}'],
|
||||
['running feature benchmark', '${{ steps.run-feature.outcome }}'],
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user