chore: improve benchmark (#8664)

**Motivation**

- it takes so much time to run benchmark, a lot of them does not make
sense
- seeing OOM with NodeJS 24, see
https://github.com/ChainSafe/lodestar/pull/8645#issuecomment-3601327203

**Description**

- remove benchmarks for flow that's not used in prod
- remove some "minMs" option for some tests that causes a lot of time
- remote test that does not reflect the bottle neck of lodestar's
performance as of Dec 2025
- remote tests that's not part of lodestar code. It's only meaningful in
the scope of that PR only

this is based on the long running test I found in
https://github.com/ChainSafe/lodestar/actions/runs/19874295397/job/56957698411

```
packages/beacon-node/test/perf/chain/validation/attestation.test.ts
  validate gossip attestation
    ✔ batch validate gossip attestation - vc 640000 - chunk 32            8931.657 ops/s    111.9613 us/op   x0.918       7814 runs   30.0 s
    ✔ batch validate gossip attestation - vc 640000 - chunk 64            9972.473 ops/s    100.2760 us/op   x0.926       4321 runs   30.1 s
    ✔ batch validate gossip attestation - vc 640000 - chunk 128           10569.62 ops/s    94.61075 us/op   x0.921       2268 runs   30.0 s
    ✔ batch validate gossip attestation - vc 640000 - chunk 256           10069.74 ops/s    99.30746 us/op   x0.901       1154 runs   30.1 s

packages/fork-choice/test/perf/protoArray/computeDeltas.test.ts
  computeDeltas
    ✔ computeDeltas 1400000 validators 0% inactive                        73.51301 ops/s    13.60303 ms/op   x0.986        539 runs   10.0 s
    ✔ computeDeltas 1400000 validators 10% inactive                       78.91095 ops/s    12.67251 ms/op   x0.989        556 runs   10.0 s
    ✔ computeDeltas 1400000 validators 20% inactive                       86.73608 ops/s    11.52923 ms/op   x1.001        598 runs   10.0 s
    ✔ computeDeltas 1400000 validators 50% inactive                       114.8443 ops/s    8.707439 ms/op   x0.990        799 runs   10.0 s
    ✔ computeDeltas 2100000 validators 0% inactive                        48.69939 ops/s    20.53414 ms/op   x0.996        371 runs   10.0 s
    ✔ computeDeltas 2100000 validators 10% inactive                       53.13929 ops/s    18.81847 ms/op   x1.000        371 runs   10.0 s
    ✔ computeDeltas 2100000 validators 20% inactive                       60.11017 ops/s    16.63612 ms/op   x0.978        418 runs   10.0 s
    ✔ computeDeltas 2100000 validators 50% inactive                       79.46802 ops/s    12.58368 ms/op   x0.967        552 runs   10.0 s

packages/state-transition/test/perf/util/loadState/findModifiedValidators.test.ts
  find modified validators by different ways
    serialize validators then findModifiedValidators
      ✔ findModifiedValidators - 10000 modified validators                  1.382729 ops/s    723.2076 ms/op   x0.993         10 runs   9.21 s
      ✔ findModifiedValidators - 1000 modified validators                   1.298120 ops/s    770.3450 ms/op   x1.152         10 runs   8.68 s
      ✔ findModifiedValidators - 100 modified validators                    3.535168 ops/s    282.8720 ms/op   x1.329         10 runs   3.85 s
      ✔ findModifiedValidators - 10 modified validators                     4.648368 ops/s    215.1293 ms/op   x1.548         10 runs   3.13 s
      ✔ findModifiedValidators - 1 modified validators                      5.296754 ops/s    188.7949 ms/op   x1.187         10 runs   3.10 s
      ✔ findModifiedValidators - no difference                              3.873496 ops/s    258.1647 ms/op   x1.236         12 runs   3.88 s
    deserialize validators then compare validator ViewDUs
      ✔ compare ViewDUs                                                    0.1524038 ops/s    6.561514  s/op   x1.077          9 runs   65.7 s
    serialize each validator then compare Uin8Array
      ✔ compare each validator Uint8Array                                  0.8007866 ops/s    1.248772  s/op   x0.830         10 runs   13.7 s
    compare validator ViewDU to Uint8Array
      ✔ compare ViewDU to Uint8Array                                       0.9549799 ops/s    1.047143  s/op   x0.999         10 runs   11.5 s

packages/state-transition/test/perf/util/loadState/loadState.test.ts
  loadState
    ✔ migrate state 1000000 validators, 24 modified, 0 new               0.9790753 ops/s    1.021372  s/op   x1.147         57 runs   60.1 s
    ✔ migrate state 1000000 validators, 1700 modified, 1000 new          0.7290797 ops/s    1.371592  s/op   x0.942         43 runs   61.1 s
    ✔ migrate state 1000000 validators, 3400 modified, 2000 new          0.6307866 ops/s    1.585322  s/op   x0.883         37 runs   60.9 s
    ✔ migrate state 1500000 validators, 24 modified, 0 new               0.9393088 ops/s    1.064613  s/op   x0.911         55 runs   60.5 s
    ✔ migrate state 1500000 validators, 1700 modified, 1000 new          0.8235204 ops/s    1.214299  s/op   x0.785         48 runs   60.2 s
    ✔ migrate state 1500000 validators, 3400 modified, 2000 new          0.6997867 ops/s    1.429007  s/op   x0.720         41 runs   60.7 s


  ✔ naive computeProposerIndex 100000 validators                        21.29210 ops/s    46.96578 ms/op   x0.591         10 runs   51.8 s

  getNextSyncCommitteeIndices electra
    ✔ naiveGetNextSyncCommitteeIndices 1000 validators                   0.1319639 ops/s    7.577831  s/op   x0.675          8 runs   66.8 s
    ✔ getNextSyncCommitteeIndices 1000 validators                         9.444554 ops/s    105.8811 ms/op   x0.753         10 runs   1.60 s
    ✔ naiveGetNextSyncCommitteeIndices 10000 validators                  0.1280431 ops/s    7.809868  s/op   x0.766          7 runs   61.8 s
    ✔ getNextSyncCommitteeIndices 10000 validators                        9.244910 ops/s    108.1676 ms/op   x0.880         10 runs   1.62 s
    ✔ naiveGetNextSyncCommitteeIndices 100000 validators                 0.1295493 ops/s    7.719071  s/op   x0.814          7 runs   61.9 s
    ✔ getNextSyncCommitteeIndices 100000 validators                       9.279165 ops/s    107.7683 ms/op   x0.751         10 runs   1.62 s

  computeShuffledIndex
    ✔ naive computeShuffledIndex 100000 validators                      0.04376956 ops/s    22.84693  s/op   x0.719          2 runs   67.8 s
    ✔ cached computeShuffledIndex 100000 validators                       1.790556 ops/s    558.4858 ms/op   x0.973         10 runs   6.16 s
    ✔ naive computeShuffledIndex 2000000 validators                    0.002243157 ops/s    445.8003  s/op   x0.922          1 runs    931 s
    ✔ cached computeShuffledIndex 2000000 validators                    0.02947726 ops/s    33.92445  s/op   x0.810          1 runs   71.3 s

packages/state-transition/test/perf/util/signingRoot.test.ts
  computeSigningRoot
    ✔ computeSigningRoot for AttestationData                              51551.61 ops/s    19.39804 us/op   x0.905        491 runs   10.0 s
    ✔ hash AttestationData serialized data then Buffer.toString(base64    639269.7 ops/s    1.564285 us/op   x0.977       5818 runs   10.0 s
    ✔ toHexString serialized data                                         886487.9 ops/s    1.128047 us/op   x0.926       8417 runs   10.0 s
    ✔ Buffer.toString(base64)                                              6071166 ops/s    164.7130 ns/op   x0.974      50685 runs   10.1 s
```

---------

Co-authored-by: Tuyen Nguyen <twoeths@users.noreply.github.com>
This commit is contained in:
twoeths
2025-12-03 21:57:51 +07:00
committed by GitHub
parent 8475d71f09
commit 1ad9c40143
10 changed files with 53 additions and 141 deletions

View File

@@ -1,5 +1,5 @@
import assert from "node:assert";
import {bench, describe, setBenchOpts} from "@chainsafe/benchmark";
import {bench, describe} from "@chainsafe/benchmark";
import {ssz} from "@lodestar/types";
import {generateTestCachedBeaconStateOnlyValidators} from "../../../../../state-transition/test/perf/util.js";
import {validateGossipAttestationsSameAttData} from "../../../../src/chain/validation/index.js";
@@ -7,10 +7,6 @@ import {getAttDataFromAttestationSerialized} from "../../../../src/util/sszBytes
import {getAttestationValidData} from "../../../utils/validationData/attestation.js";
describe("validate gossip attestation", () => {
setBenchOpts({
minMs: 30_000,
});
const vc = 640_000;
const stateSlot = 100;
const state = generateTestCachedBeaconStateOnlyValidators({vc, slot: stateSlot});

View File

@@ -1,6 +1,9 @@
import {beforeAll, bench, describe} from "@chainsafe/benchmark";
describe("bytes utils", () => {
/**
* Enable this if you want to compare performance of Buffer vs Uint8Array operations. Not lodestar code so skipped by default.
*/
describe.skip("bytes utils", () => {
const roots: Uint8Array[] = [];
let buffers: Buffer[] = [];
const count = 32;

View File

@@ -1,6 +1,10 @@
import {bench, describe} from "@chainsafe/benchmark";
describe("dataview", () => {
/**
* Benchmark to compare DataView.getUint32 vs manual uint32 creation from Uint8Array.
* Not lodestar code so skipped by default.
*/
describe.skip("dataview", () => {
const data = Uint8Array.from([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
bench({

View File

@@ -1,6 +1,29 @@
import {bench, describe, setBenchOpts} from "@chainsafe/benchmark";
describe("transfer bytes", () => {
/**
* This shows how fast the transfer of bytes between workers is compared to a simple copy.
* Disable by default because it's not lodestar code.
* transfer bytes
✔ transfer serialized Status (84 B) 232504.1 ops/s 4.301000 us/op x1.968 39313 runs 0.320 s
✔ copy serialized Status (84 B) 413736.0 ops/s 2.417000 us/op x2.029 79160 runs 0.344 s
✔ transfer serialized SignedVoluntaryExit (112 B) 233644.9 ops/s 4.280000 us/op x1.912 65063 runs 0.535 s
✔ copy serialized SignedVoluntaryExit (112 B) 434593.7 ops/s 2.301000 us/op x1.895 105903 runs 0.453 s
✔ transfer serialized ProposerSlashing (416 B) 243013.4 ops/s 4.115000 us/op x1.800 38143 runs 0.321 s
✔ copy serialized ProposerSlashing (416 B) 360360.4 ops/s 2.775000 us/op x2.202 85781 runs 0.444 s
✔ transfer serialized Attestation (485 B) 238948.6 ops/s 4.185000 us/op x1.809 38342 runs 0.320 s
✔ copy serialized Attestation (485 B) 438020.1 ops/s 2.283000 us/op x1.777 97506 runs 0.459 s
✔ transfer serialized AttesterSlashing (33232 B) 228937.7 ops/s 4.368000 us/op x1.734 28449 runs 0.419 s
✔ copy serialized AttesterSlashing (33232 B) 129148.9 ops/s 7.743000 us/op x1.797 21674 runs 0.310 s
✔ transfer serialized Small SignedBeaconBlock (128000 B) 183553.6 ops/s 5.448000 us/op x1.328 10288 runs 0.408 s
✔ copy serialized Small SignedBeaconBlock (128000 B) 11670.25 ops/s 85.68800 us/op x6.069 2868 runs 0.405 s
✔ transfer serialized Avg SignedBeaconBlock (200000 B) 199561.0 ops/s 5.011000 us/op x1.172 12879 runs 0.727 s
✔ copy serialized Avg SignedBeaconBlock (200000 B) 12585.90 ops/s 79.45400 us/op x4.288 2916 runs 0.408 s
✔ transfer serialized BlobsSidecar (524380 B) 189501.6 ops/s 5.277000 us/op x1.025 1896 runs 0.474 s
✔ copy serialized BlobsSidecar (524380 B) 5294.703 ops/s 188.8680 us/op x1.702 1268 runs 0.546 s
✔ transfer serialized Big SignedBeaconBlock (1000000 B) 167084.4 ops/s 5.985000 us/op x1.134 1443 runs 0.514 s
✔ copy serialized Big SignedBeaconBlock (1000000 B) 6337.457 ops/s 157.7920 us/op x1.246 1200 runs 0.521 s
*/
describe.skip("transfer bytes", () => {
const sizes = [
{size: 84, name: "Status"},
{size: 112, name: "SignedVoluntaryExit"},

View File

@@ -1,4 +1,4 @@
import {beforeAll, bench, describe, setBenchOpts} from "@chainsafe/benchmark";
import {beforeAll, bench, describe} from "@chainsafe/benchmark";
import {EffectiveBalanceIncrements, getEffectiveBalanceIncrementsZeroed} from "@lodestar/state-transition";
import {computeDeltas} from "../../../src/protoArray/computeDeltas.js";
import {NULL_VOTE_INDEX} from "../../../src/protoArray/interface.js";
@@ -26,11 +26,6 @@ describe("computeDeltas", () => {
2 * 60 * 1000
);
setBenchOpts({
minMs: 10 * 1000,
maxMs: 10 * 1000,
});
for (const inainactiveValidatorsPercentage of inactiveValidatorsPercentages) {
if (inainactiveValidatorsPercentage < 0 || inainactiveValidatorsPercentage > 1) {
throw new Error("inactiveValidatorsPercentage must be between 0 and 1");

View File

@@ -21,8 +21,9 @@ import {generateValidators} from "../../utils/validator.js";
* - with 123687377 bytes, Buffer.compare() is 38x faster
* ✔ byteArrayEquals 123687377 3.077884 ops/s 324.8985 ms/op - 1 runs 64.5 s
* ✔ Buffer.compare 123687377 114.7834 ops/s 8.712061 ms/op - 13 runs 12.1 s
*
*/
describe("compare Uint8Array using byteArrayEquals() vs Buffer.compare()", () => {
describe.skip("compare Uint8Array using byteArrayEquals() vs Buffer.compare()", () => {
const numValidator = 1_000_000;
const validators = generateValidators(numValidator);
const state = generateState({validators: validators});

View File

@@ -71,7 +71,7 @@ describe("find modified validators by different ways", () => {
return clonedState;
},
fn: (clonedState) => {
const validatorsBytes = Uint8Array.from(stateBytes.subarray(validatorsRange.start, validatorsRange.end));
const validatorsBytes = stateBytes.subarray(validatorsRange.start, validatorsRange.end);
const validatorsBytes2 = clonedState.validators.serialize();
const modifiedValidators: number[] = [];
findModifiedValidators(validatorsBytes, validatorsBytes2, modifiedValidators);
@@ -84,7 +84,7 @@ describe("find modified validators by different ways", () => {
}
});
describe("deserialize validators then compare validator ViewDUs", () => {
describe.skip("deserialize validators then compare validator ViewDUs", () => {
const validatorsBytes = stateBytes.subarray(validatorsRange.start, validatorsRange.end);
bench("compare ViewDUs", () => {
const numValidator = state.validators.length;
@@ -97,7 +97,7 @@ describe("find modified validators by different ways", () => {
});
});
describe("serialize each validator then compare Uin8Array", () => {
describe.skip("serialize each validator then compare Uin8Array", () => {
const validators = state.validators.getAllReadonly();
bench("compare each validator Uint8Array", () => {
for (let i = 0; i < state.validators.length; i++) {
@@ -117,7 +117,7 @@ describe("find modified validators by different ways", () => {
});
});
describe("compare validator ViewDU to Uint8Array", () => {
describe.skip("compare validator ViewDU to Uint8Array", () => {
bench("compare ViewDU to Uint8Array", () => {
const numValidator = state.validators.length;
for (let i = 0; i < numValidator; i++) {

View File

@@ -1,4 +1,4 @@
import {bench, describe, setBenchOpts} from "@chainsafe/benchmark";
import {bench, describe} from "@chainsafe/benchmark";
import {PublicKey} from "@chainsafe/blst";
import {PubkeyIndexMap} from "@chainsafe/pubkey-index-map";
import {Index2PubkeyCache} from "../../../../src/cache/pubkeyCache.js";
@@ -9,33 +9,12 @@ import {generatePerfTestCachedStateAltair} from "../../util.js";
/**
* This benchmark shows a stable performance from 2s to 3s on a Mac M1. And it does not really depend on the seed validators,
* only the modified and new validators
*
* - On mainnet, as of Oct 2023, there are ~1M validators
*
* ✔ migrate state 1000000 validators, 24 modified, 0 new 0.4475463 ops/s 2.234406 s/op - 3 runs 62.1 s
* ✔ migrate state 1000000 validators, 1700 modified, 1000 new 0.3663298 ops/s 2.729781 s/op - 21 runs 62.1 s
* ✔ migrate state 1000000 validators, 3400 modified, 2000 new 0.3413125 ops/s 2.929866 s/op - 19 runs 60.9 s
* - On holesky, there are ~1.5M validators
* ✔ migrate state 1500000 validators, 24 modified, 0 new 0.4278145 ops/s 2.337461 s/op - 24 runs 61.1 s
* ✔ migrate state 1500000 validators, 1700 modified, 1000 new 0.3642085 ops/s 2.745680 s/op - 20 runs 60.1 s
* ✔ migrate state 1500000 validators, 3400 modified, 2000 new 0.3344296 ops/s 2.990166 s/op - 19 runs 62.4 s
*/
describe("loadState", () => {
setBenchOpts({
minMs: 60_000,
});
const testCases: {seedValidators: number; numModifiedValidators: number; numNewValidators: number}[] = [
// this 1_000_000 is similar to mainnet state as of Oct 2023
// similar to migrating from state 7335296 to state 7335360 on mainnet, this is 2 epochs difference
{seedValidators: 1_000_000, numModifiedValidators: 24, numNewValidators: 0},
{seedValidators: 1_000_000, numModifiedValidators: 1700, numNewValidators: 1000},
// similar to migrating from state 7327776 to state 7335360 on mainnet, this is 237 epochs difference ~ 1 day
{seedValidators: 1_000_000, numModifiedValidators: 3400, numNewValidators: 2000},
// same tests on holesky with 1_500_000 validators
{seedValidators: 1_500_000, numModifiedValidators: 24, numNewValidators: 0},
{seedValidators: 1_500_000, numModifiedValidators: 1700, numNewValidators: 1000},
// enable these tests if you want to see performance with different seed validators
// {seedValidators: 1_500_000, numModifiedValidators: 24, numNewValidators: 0},
// {seedValidators: 1_500_000, numModifiedValidators: 1700, numNewValidators: 1000},
{seedValidators: 1_500_000, numModifiedValidators: 3400, numNewValidators: 2000},
];
for (const {seedValidators, numModifiedValidators, numNewValidators} of testCases) {

View File

@@ -26,7 +26,8 @@ describe("computeProposerIndex", () => {
const activeIndices = new Uint32Array(Array.from({length: vc}, (_, i) => i));
const runsFactor = 100;
bench({
// enable this if you want to see the naive version performance
bench.skip({
id: `naive computeProposerIndex ${vc} validators`,
fn: () => {
for (let i = 0; i < runsFactor; i++) {
@@ -57,7 +58,8 @@ describe("getNextSyncCommitteeIndices electra", () => {
effectiveBalanceIncrements[i] = 32;
}
bench({
// enable this if you want to see the naive version performance
bench.skip({
id: `naiveGetNextSyncCommitteeIndices ${vc} validators`,
fn: () => {
naiveGetNextSyncCommitteeIndices(ForkSeq.electra, state, activeIndices, effectiveBalanceIncrements);
@@ -77,7 +79,8 @@ describe("computeShuffledIndex", () => {
const seed = new Uint8Array(Array.from({length: 32}, (_, i) => i));
for (const vc of [100_000, 2_000_000]) {
bench({
// enable this if you want to see the naive version performance
bench.skip({
id: `naive computeShuffledIndex ${vc} validators`,
fn: () => {
for (let i = 0; i < vc; i++) {
@@ -87,8 +90,8 @@ describe("computeShuffledIndex", () => {
});
const shuffledIndexFn = getComputeShuffledIndexFn(vc, seed);
bench({
// getComputeShuffledIndexFn() is also not in prod anymore so no need to track it
bench.skip({
id: `cached computeShuffledIndex ${vc} validators`,
fn: () => {
for (let i = 0; i < vc; i++) {

View File

@@ -1,92 +0,0 @@
import {digest} from "@chainsafe/as-sha256";
import {bench, describe, setBenchOpts} from "@chainsafe/benchmark";
import {fromHexString, toHexString} from "@chainsafe/ssz";
import {phase0, ssz} from "@lodestar/types";
import {computeSigningRoot} from "../../../src/util/signingRoot.js";
/**
* As of Apr 2023, when we apply new gossip queues we process all gossip attestations and computeSiningRoot may take up to 6% of cpu.
* The below benchmark results show that if we use Buffer.toString(base64) against serialized attestation data, it is still way cheaper
* than computeSigningRoot.
* Based on that we can cache attestation data as string in order to avoid recomputing signing root when validating gossip attestations.
* computeSigningRoot
✔ computeSigningRoot for AttestationData 94788.17 ops/s 10.54984 us/op - 901 runs 10.0 s
✔ hash AttestationData serialized data then Buffer.toString(base64 509425.9 ops/s 1.962994 us/op - 4856 runs 10.0 s
✔ toHexString serialized data 727592.3 ops/s 1.374396 us/op - 6916 runs 10.0 s
✔ Buffer.toString(base64) 2570800 ops/s 388.9840 ns/op - 24628 runs 10.1 s
*/
describe("computeSigningRoot", () => {
setBenchOpts({
minMs: 10_000,
});
const type = ssz.phase0.AttestationData;
const seedObject: phase0.AttestationData = {
slot: 6118259,
index: 46,
beaconBlockRoot: fromHexString("0x94cef26d543b20568a4bbb77ae2ba203826912065348613a437a9106142aff85"),
source: {
epoch: 191194,
root: fromHexString("0x1a955a91af4ee915c1f267f0026668c58237c1a23bd6c106ef05459741a9171c"),
},
target: {
epoch: 191195,
root: fromHexString("0x48db1209cd969a1a74eb19d1c5e24021d3a4ac45b8b1b2c1b0e8b0c1b0e8b0c1"),
},
};
const bytes = type.serialize(seedObject);
const domain = new Uint8Array(32);
bench({
id: "computeSigningRoot for AttestationData",
fn: () => {
for (let i = 0; i < 1000; i++) {
computeSigningRoot(type, clone(seedObject), domain);
}
},
runsFactor: 1000,
});
bench({
id: "hash AttestationData serialized data then Buffer.toString(base64)",
fn: () => {
for (let i = 0; i < 1000; i++) {
clone(seedObject);
Buffer.from(digest(bytes)).toString("base64");
}
},
runsFactor: 1000,
});
bench({
id: "toHexString serialized data",
fn: () => {
for (let i = 0; i < 1000; i++) {
clone(seedObject);
toHexString(bytes);
}
},
runsFactor: 1000,
});
bench({
id: "Buffer.toString(base64)",
fn: () => {
for (let i = 0; i < 1000; i++) {
clone(seedObject);
Buffer.from(bytes).toString("base64");
}
},
runsFactor: 1000,
});
});
function clone(sszObject: phase0.AttestationData): phase0.AttestationData {
return {
slot: sszObject.slot,
index: sszObject.index,
beaconBlockRoot: sszObject.beaconBlockRoot,
source: sszObject.source,
target: sszObject.target,
};
}