feat: track performance of processFinalizedCheckpoint() (#8530)

**Motivation**

- investigate and maintain the performance of
`processFinalizedCheckpoint()`
- this is part of #8526

**Description**

- track duration of `processFinalizedCheckpoint()` by tasks, the result
on a hoodi node, it shows that `FrequencyStateArchiveStrategy` takes the
most time

<img width="941" height="297" alt="Screenshot 2025-10-14 at 13 45 38"
src="https://github.com/user-attachments/assets/ef440399-538b-4a4a-a63c-e775745b25e6"
/>


- track different steps of `FrequencyStateArchiveStrategy`, the result
shows that the mainthread is blocked by different db queries cc
@wemeetagain

<img width="1291" height="657" alt="Screenshot 2025-10-14 at 13 46 36"
src="https://github.com/user-attachments/assets/3b19f008-c7d8-49a4-9dc5-e68b1a5ba2a5"
/>

part of #8526

---------

Co-authored-by: Tuyen Nguyen <twoeths@users.noreply.github.com>
This commit is contained in:
twoeths
2025-10-14 20:28:12 +07:00
committed by GitHub
parent c7f3e8d129
commit b739d2d697
3 changed files with 71 additions and 4 deletions

View File

@@ -25,6 +25,16 @@ type ArchiveStoreModules = {
type ArchiveStoreInitOpts = ArchiveStoreOpts & {dbName: string; anchorState: {finalizedCheckpoint: Checkpoint}};
export enum ArchiveStoreTask {
ArchiveBlocks = "archive_blocks",
PruneHistory = "prune_history",
OnFinalizedCheckpoint = "on_finalized_checkpoint",
MaybeArchiveState = "maybe_archive_state",
RegenPruneOnFinalized = "regen_prune_on_finalized",
ForkchoicePrune = "forkchoice_prune",
UpdateBackfillRange = "update_backfill_range",
}
/**
* Used for running tasks that depends on some events or are executed
* periodically.
@@ -176,6 +186,8 @@ export class ArchiveStore {
try {
const finalizedEpoch = finalized.epoch;
this.logger.verbose("Start processing finalized checkpoint", {epoch: finalizedEpoch, rootHex: finalized.rootHex});
let timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
await archiveBlocks(
this.chain.config,
this.db,
@@ -188,7 +200,10 @@ export class ArchiveStore {
this.chain.opts.persistOrphanedBlocks,
this.chain.opts.persistOrphanedBlocksDir
);
timer?.({source: ArchiveStoreTask.ArchiveBlocks});
if (this.opts.pruneHistory) {
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
await pruneHistory(
this.chain.config,
this.db,
@@ -197,18 +212,30 @@ export class ArchiveStore {
finalizedEpoch,
this.chain.clock.currentEpoch
);
timer?.({source: ArchiveStoreTask.PruneHistory});
}
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
await this.statesArchiverStrategy.onFinalizedCheckpoint(finalized, this.metrics);
timer?.({source: ArchiveStoreTask.OnFinalizedCheckpoint});
// should be after ArchiveBlocksTask to handle restart cleanly
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
await this.statesArchiverStrategy.maybeArchiveState(finalized, this.metrics);
timer?.({source: ArchiveStoreTask.MaybeArchiveState});
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
this.chain.regen.pruneOnFinalized(finalizedEpoch);
timer?.({source: ArchiveStoreTask.RegenPruneOnFinalized});
// tasks rely on extended fork choice
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
const prunedBlocks = this.chain.forkChoice.prune(finalized.rootHex);
timer?.({source: ArchiveStoreTask.ForkchoicePrune});
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
await updateBackfillRange({chain: this.chain, db: this.db, logger: this.logger}, finalized);
timer?.({source: ArchiveStoreTask.UpdateBackfillRange});
this.logger.verbose("Finish processing finalized checkpoint", {
epoch: finalizedEpoch,

View File

@@ -17,6 +17,15 @@ import {StateArchiveStrategy, StatesArchiveOpts} from "../interface.js";
*/
export const PERSIST_TEMP_STATE_EVERY_EPOCHS = 32;
export enum FrequencyStateArchiveStep {
LoadLastStoredSlot = "load_last_stored_slot",
GetFinalizedState = "get_finalized_state",
// SerializeState is tracked via stateSerializeDuration metric
PersistState = "persist_state",
LoadStoredSlotsToDelete = "load_stored_slots_to_delete",
DeleteOldStates = "delete_old_states",
}
/**
* Archives finalized states from active bucket to archive bucket.
*
@@ -47,7 +56,10 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
* ```
*/
async maybeArchiveState(finalized: CheckpointWithHex, metrics?: Metrics | null): Promise<void> {
let timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
const lastStoredSlot = await this.db.stateArchive.lastKey();
timer?.({step: FrequencyStateArchiveStep.LoadLastStoredSlot});
const lastStoredEpoch = computeEpochAtSlot(lastStoredSlot ?? 0);
const {archiveStateEpochFrequency} = this.opts;
@@ -60,15 +72,19 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
(Math.floor(finalized.epoch / archiveStateEpochFrequency) - 1) * archiveStateEpochFrequency
);
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
const storedStateSlots = await this.db.stateArchive.keys({
lt: computeStartSlotAtEpoch(finalized.epoch),
gte: computeStartSlotAtEpoch(minEpoch),
});
timer?.({step: FrequencyStateArchiveStep.LoadStoredSlotsToDelete});
const statesSlotsToDelete = computeStateSlotsToDelete(storedStateSlots, archiveStateEpochFrequency);
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
if (statesSlotsToDelete.length > 0) {
await this.db.stateArchive.batchDelete(statesSlotsToDelete);
}
timer?.({step: FrequencyStateArchiveStep.DeleteOldStates});
// More logs to investigate the rss spike issue https://github.com/ChainSafe/lodestar/issues/5591
this.logger.verbose("Archived state completed", {
@@ -86,24 +102,31 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
*/
async archiveState(finalized: CheckpointWithHex, metrics?: Metrics | null): Promise<void> {
// starting from Mar 2024, the finalized state could be from disk or in memory
let timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
const finalizedStateOrBytes = await this.regen.getCheckpointStateOrBytes(finalized);
timer?.({step: FrequencyStateArchiveStep.GetFinalizedState});
const {rootHex} = finalized;
if (!finalizedStateOrBytes) {
throw Error(`No state in cache for finalized checkpoint state epoch #${finalized.epoch} root ${rootHex}`);
}
if (finalizedStateOrBytes instanceof Uint8Array) {
const slot = getStateSlotFromBytes(finalizedStateOrBytes);
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
await this.db.stateArchive.putBinary(slot, finalizedStateOrBytes);
timer?.({step: FrequencyStateArchiveStep.PersistState});
this.logger.verbose("Archived finalized state bytes", {epoch: finalized.epoch, slot, root: rootHex});
} else {
// serialize state using BufferPool if provided
const timer = metrics?.stateSerializeDuration.startTimer({source: AllocSource.ARCHIVE_STATE});
const sszTimer = metrics?.stateSerializeDuration.startTimer({source: AllocSource.ARCHIVE_STATE});
await serializeState(
finalizedStateOrBytes,
AllocSource.ARCHIVE_STATE,
(stateBytes) => {
timer?.();
return this.db.stateArchive.putBinary(finalizedStateOrBytes.slot, stateBytes);
async (stateBytes) => {
sszTimer?.();
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
await this.db.stateArchive.putBinary(finalizedStateOrBytes.slot, stateBytes);
timer?.({step: FrequencyStateArchiveStep.PersistState});
},
this.bufferPool
);

View File

@@ -1,5 +1,7 @@
/** biome-ignore-all lint/suspicious/noTemplateCurlyInString: The metric templates requires to have `${}` in a normal string */
import {NotReorgedReason} from "@lodestar/fork-choice";
import {ArchiveStoreTask} from "../../chain/archiveStore/archiveStore.js";
import {FrequencyStateArchiveStep} from "../../chain/archiveStore/strategies/frequencyStateArchiveStrategy.js";
import {BlockInputSource} from "../../chain/blocks/blockInput/index.js";
import {JobQueueItemType} from "../../chain/bls/index.js";
import {AttestationErrorCode, BlockErrorCode} from "../../chain/errors/index.js";
@@ -1420,6 +1422,21 @@ export function createLodestarMetrics(
},
},
processFinalizedCheckpoint: {
durationByTask: register.histogram<{source: ArchiveStoreTask}>({
name: "lodestar_process_finalized_checkpoint_seconds",
help: "Histogram of time to process finalized checkpoint",
buckets: [0.1, 0.5, 1, 2, 4, 8],
labelNames: ["source"],
}),
frequencyStateArchive: register.histogram<{step: FrequencyStateArchiveStep}>({
name: "lodestar_process_finalized_checkpoint_frequency_state_archive_seconds",
help: "Histogram of FrequencyStateArchive duration by step",
buckets: [0.1, 0.5, 1, 2, 4, 8],
labelNames: ["step"],
}),
},
regenFnCallTotal: register.gauge<{entrypoint: RegenFnName; caller: RegenCaller}>({
name: "lodestar_regen_fn_call_total",
help: "Total number of calls for regen functions",