ci(bench): use ABBA run order to reduce variance (#22321)

This commit is contained in:
Alexey Shekhirin
2026-02-18 15:33:31 +00:00
committed by GitHub
parent 9251997c1f
commit ee19320ee8
3 changed files with 42 additions and 11 deletions

View File

@@ -188,30 +188,56 @@ def plot_gas_vs_latency(
plt.close(fig)
def merge_csvs(paths: list[str]) -> list[dict]:
"""Parse and merge multiple CSVs, averaging values for duplicate blocks."""
by_block: dict[int, list[dict]] = {}
for path in paths:
for row in parse_combined_csv(path):
by_block.setdefault(row["block_number"], []).append(row)
merged = []
for bn in sorted(by_block):
rows = by_block[bn]
if len(rows) == 1:
merged.append(rows[0])
else:
avg = {"block_number": bn}
for key in ("gas_used", "new_payload_latency_us"):
avg[key] = int(sum(r[key] for r in rows) / len(rows))
for key in ("persistence_wait_us", "execution_cache_wait_us", "sparse_trie_wait_us"):
vals = [r[key] for r in rows if r[key] is not None]
avg[key] = int(sum(vals) / len(vals)) if vals else None
merged.append(avg)
return merged
def main():
parser = argparse.ArgumentParser(description="Generate benchmark charts")
parser.add_argument("combined_csv", help="Path to combined_latency.csv (feature)")
parser.add_argument(
"--feature", nargs="+", required=True,
help="Path(s) to feature combined_latency.csv",
)
parser.add_argument(
"--output-dir", required=True, help="Output directory for PNG charts"
)
parser.add_argument(
"--baseline", help="Path to baseline combined_latency.csv"
"--baseline", nargs="+", help="Path(s) to baseline combined_latency.csv"
)
parser.add_argument("--baseline-name", default="baseline", help="Label for baseline")
parser.add_argument("--feature-name", "--branch-name", default="feature", help="Label for feature")
args = parser.parse_args()
feature = parse_combined_csv(args.combined_csv)
feature = merge_csvs(args.feature)
if not feature:
print("No results found in combined CSV", file=sys.stderr)
print("No results found in feature CSV(s)", file=sys.stderr)
sys.exit(1)
baseline = None
if args.baseline:
baseline = parse_combined_csv(args.baseline)
baseline = merge_csvs(args.baseline)
if not baseline:
print(
"Warning: no results in baseline CSV, skipping comparison",
"Warning: no results in baseline CSV(s), skipping comparison",
file=sys.stderr,
)
baseline = None

View File

@@ -183,11 +183,13 @@ def compute_paired_stats(
all_pairs = []
all_lat_diffs = []
all_mgas_diffs = []
blocks_per_pair = []
for baseline, feature in zip(baseline_runs, feature_runs):
pairs, lat_diffs, mgas_diffs = _paired_data(baseline, feature)
all_pairs.extend(pairs)
all_lat_diffs.extend(lat_diffs)
all_mgas_diffs.extend(mgas_diffs)
blocks_per_pair.append(len(pairs))
if not all_lat_diffs:
return {}
@@ -237,6 +239,7 @@ def compute_paired_stats(
"p99_ci_ms": (p99_boot[hi] - p99_boot[lo]) / 2,
"mean_mgas_diff": mean_mgas_diff,
"mgas_ci": mgas_ci,
"blocks": max(blocks_per_pair),
}
@@ -298,7 +301,7 @@ def generate_comparison_table(
feature_sha: str,
) -> str:
"""Generate a markdown comparison table between baseline and feature."""
n = paired["n"]
n = paired["blocks"]
def pct(base: float, feat: float) -> float:
return (feat - base) / base * 100.0 if base > 0 else 0.0

View File

@@ -476,12 +476,13 @@ jobs:
- name: Prepare source dirs
run: |
BASELINE_REF="${{ steps.refs.outputs.baseline-ref }}"
if [ -d ../reth-baseline ]; then
git -C ../reth-baseline fetch origin
git -C ../reth-baseline fetch origin "$BASELINE_REF"
else
git clone . ../reth-baseline
fi
git -C ../reth-baseline checkout "${{ steps.refs.outputs.baseline-ref }}"
git -C ../reth-baseline checkout "$BASELINE_REF"
ln -sfn "$(pwd)" ../reth-feature
- name: Build baseline and feature binaries in parallel
@@ -611,8 +612,9 @@ jobs:
BASELINE_NAME: ${{ steps.refs.outputs.baseline-name }}
FEATURE_NAME: ${{ steps.refs.outputs.feature-name }}
run: |
CHART_ARGS="/tmp/bench-results-feature/combined_latency.csv --output-dir /tmp/bench-charts"
CHART_ARGS="$CHART_ARGS --baseline /tmp/bench-results-baseline/combined_latency.csv"
CHART_ARGS="--output-dir /tmp/bench-charts"
CHART_ARGS="$CHART_ARGS --feature /tmp/bench-results-feature-1/combined_latency.csv /tmp/bench-results-feature-2/combined_latency.csv"
CHART_ARGS="$CHART_ARGS --baseline /tmp/bench-results-baseline-1/combined_latency.csv /tmp/bench-results-baseline-2/combined_latency.csv"
CHART_ARGS="$CHART_ARGS --baseline-name ${BASELINE_NAME}"
CHART_ARGS="$CHART_ARGS --feature-name ${FEATURE_NAME}"
# shellcheck disable=SC2086