mirror of
https://github.com/ChainSafe/lodestar.git
synced 2026-01-09 15:48:08 -05:00
feat: track performance of processFinalizedCheckpoint() (#8530)
**Motivation** - investigate and maintain the performance of `processFinalizedCheckpoint()` - this is part of #8526 **Description** - track duration of `processFinalizedCheckpoint()` by tasks, the result on a hoodi node, it shows that `FrequencyStateArchiveStrategy` takes the most time <img width="941" height="297" alt="Screenshot 2025-10-14 at 13 45 38" src="https://github.com/user-attachments/assets/ef440399-538b-4a4a-a63c-e775745b25e6" /> - track different steps of `FrequencyStateArchiveStrategy`, the result shows that the mainthread is blocked by different db queries cc @wemeetagain <img width="1291" height="657" alt="Screenshot 2025-10-14 at 13 46 36" src="https://github.com/user-attachments/assets/3b19f008-c7d8-49a4-9dc5-e68b1a5ba2a5" /> part of #8526 --------- Co-authored-by: Tuyen Nguyen <twoeths@users.noreply.github.com>
This commit is contained in:
@@ -25,6 +25,16 @@ type ArchiveStoreModules = {
|
||||
|
||||
type ArchiveStoreInitOpts = ArchiveStoreOpts & {dbName: string; anchorState: {finalizedCheckpoint: Checkpoint}};
|
||||
|
||||
export enum ArchiveStoreTask {
|
||||
ArchiveBlocks = "archive_blocks",
|
||||
PruneHistory = "prune_history",
|
||||
OnFinalizedCheckpoint = "on_finalized_checkpoint",
|
||||
MaybeArchiveState = "maybe_archive_state",
|
||||
RegenPruneOnFinalized = "regen_prune_on_finalized",
|
||||
ForkchoicePrune = "forkchoice_prune",
|
||||
UpdateBackfillRange = "update_backfill_range",
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for running tasks that depends on some events or are executed
|
||||
* periodically.
|
||||
@@ -176,6 +186,8 @@ export class ArchiveStore {
|
||||
try {
|
||||
const finalizedEpoch = finalized.epoch;
|
||||
this.logger.verbose("Start processing finalized checkpoint", {epoch: finalizedEpoch, rootHex: finalized.rootHex});
|
||||
|
||||
let timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
|
||||
await archiveBlocks(
|
||||
this.chain.config,
|
||||
this.db,
|
||||
@@ -188,7 +200,10 @@ export class ArchiveStore {
|
||||
this.chain.opts.persistOrphanedBlocks,
|
||||
this.chain.opts.persistOrphanedBlocksDir
|
||||
);
|
||||
timer?.({source: ArchiveStoreTask.ArchiveBlocks});
|
||||
|
||||
if (this.opts.pruneHistory) {
|
||||
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
|
||||
await pruneHistory(
|
||||
this.chain.config,
|
||||
this.db,
|
||||
@@ -197,18 +212,30 @@ export class ArchiveStore {
|
||||
finalizedEpoch,
|
||||
this.chain.clock.currentEpoch
|
||||
);
|
||||
timer?.({source: ArchiveStoreTask.PruneHistory});
|
||||
}
|
||||
|
||||
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
|
||||
await this.statesArchiverStrategy.onFinalizedCheckpoint(finalized, this.metrics);
|
||||
timer?.({source: ArchiveStoreTask.OnFinalizedCheckpoint});
|
||||
|
||||
// should be after ArchiveBlocksTask to handle restart cleanly
|
||||
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
|
||||
await this.statesArchiverStrategy.maybeArchiveState(finalized, this.metrics);
|
||||
timer?.({source: ArchiveStoreTask.MaybeArchiveState});
|
||||
|
||||
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
|
||||
this.chain.regen.pruneOnFinalized(finalizedEpoch);
|
||||
timer?.({source: ArchiveStoreTask.RegenPruneOnFinalized});
|
||||
|
||||
// tasks rely on extended fork choice
|
||||
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
|
||||
const prunedBlocks = this.chain.forkChoice.prune(finalized.rootHex);
|
||||
timer?.({source: ArchiveStoreTask.ForkchoicePrune});
|
||||
|
||||
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
|
||||
await updateBackfillRange({chain: this.chain, db: this.db, logger: this.logger}, finalized);
|
||||
timer?.({source: ArchiveStoreTask.UpdateBackfillRange});
|
||||
|
||||
this.logger.verbose("Finish processing finalized checkpoint", {
|
||||
epoch: finalizedEpoch,
|
||||
|
||||
@@ -17,6 +17,15 @@ import {StateArchiveStrategy, StatesArchiveOpts} from "../interface.js";
|
||||
*/
|
||||
export const PERSIST_TEMP_STATE_EVERY_EPOCHS = 32;
|
||||
|
||||
export enum FrequencyStateArchiveStep {
|
||||
LoadLastStoredSlot = "load_last_stored_slot",
|
||||
GetFinalizedState = "get_finalized_state",
|
||||
// SerializeState is tracked via stateSerializeDuration metric
|
||||
PersistState = "persist_state",
|
||||
LoadStoredSlotsToDelete = "load_stored_slots_to_delete",
|
||||
DeleteOldStates = "delete_old_states",
|
||||
}
|
||||
|
||||
/**
|
||||
* Archives finalized states from active bucket to archive bucket.
|
||||
*
|
||||
@@ -47,7 +56,10 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
|
||||
* ```
|
||||
*/
|
||||
async maybeArchiveState(finalized: CheckpointWithHex, metrics?: Metrics | null): Promise<void> {
|
||||
let timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
|
||||
const lastStoredSlot = await this.db.stateArchive.lastKey();
|
||||
timer?.({step: FrequencyStateArchiveStep.LoadLastStoredSlot});
|
||||
|
||||
const lastStoredEpoch = computeEpochAtSlot(lastStoredSlot ?? 0);
|
||||
const {archiveStateEpochFrequency} = this.opts;
|
||||
|
||||
@@ -60,15 +72,19 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
|
||||
(Math.floor(finalized.epoch / archiveStateEpochFrequency) - 1) * archiveStateEpochFrequency
|
||||
);
|
||||
|
||||
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
|
||||
const storedStateSlots = await this.db.stateArchive.keys({
|
||||
lt: computeStartSlotAtEpoch(finalized.epoch),
|
||||
gte: computeStartSlotAtEpoch(minEpoch),
|
||||
});
|
||||
timer?.({step: FrequencyStateArchiveStep.LoadStoredSlotsToDelete});
|
||||
|
||||
const statesSlotsToDelete = computeStateSlotsToDelete(storedStateSlots, archiveStateEpochFrequency);
|
||||
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
|
||||
if (statesSlotsToDelete.length > 0) {
|
||||
await this.db.stateArchive.batchDelete(statesSlotsToDelete);
|
||||
}
|
||||
timer?.({step: FrequencyStateArchiveStep.DeleteOldStates});
|
||||
|
||||
// More logs to investigate the rss spike issue https://github.com/ChainSafe/lodestar/issues/5591
|
||||
this.logger.verbose("Archived state completed", {
|
||||
@@ -86,24 +102,31 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
|
||||
*/
|
||||
async archiveState(finalized: CheckpointWithHex, metrics?: Metrics | null): Promise<void> {
|
||||
// starting from Mar 2024, the finalized state could be from disk or in memory
|
||||
let timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
|
||||
const finalizedStateOrBytes = await this.regen.getCheckpointStateOrBytes(finalized);
|
||||
timer?.({step: FrequencyStateArchiveStep.GetFinalizedState});
|
||||
|
||||
const {rootHex} = finalized;
|
||||
if (!finalizedStateOrBytes) {
|
||||
throw Error(`No state in cache for finalized checkpoint state epoch #${finalized.epoch} root ${rootHex}`);
|
||||
}
|
||||
if (finalizedStateOrBytes instanceof Uint8Array) {
|
||||
const slot = getStateSlotFromBytes(finalizedStateOrBytes);
|
||||
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
|
||||
await this.db.stateArchive.putBinary(slot, finalizedStateOrBytes);
|
||||
timer?.({step: FrequencyStateArchiveStep.PersistState});
|
||||
this.logger.verbose("Archived finalized state bytes", {epoch: finalized.epoch, slot, root: rootHex});
|
||||
} else {
|
||||
// serialize state using BufferPool if provided
|
||||
const timer = metrics?.stateSerializeDuration.startTimer({source: AllocSource.ARCHIVE_STATE});
|
||||
const sszTimer = metrics?.stateSerializeDuration.startTimer({source: AllocSource.ARCHIVE_STATE});
|
||||
await serializeState(
|
||||
finalizedStateOrBytes,
|
||||
AllocSource.ARCHIVE_STATE,
|
||||
(stateBytes) => {
|
||||
timer?.();
|
||||
return this.db.stateArchive.putBinary(finalizedStateOrBytes.slot, stateBytes);
|
||||
async (stateBytes) => {
|
||||
sszTimer?.();
|
||||
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
|
||||
await this.db.stateArchive.putBinary(finalizedStateOrBytes.slot, stateBytes);
|
||||
timer?.({step: FrequencyStateArchiveStep.PersistState});
|
||||
},
|
||||
this.bufferPool
|
||||
);
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
/** biome-ignore-all lint/suspicious/noTemplateCurlyInString: The metric templates requires to have `${}` in a normal string */
|
||||
import {NotReorgedReason} from "@lodestar/fork-choice";
|
||||
import {ArchiveStoreTask} from "../../chain/archiveStore/archiveStore.js";
|
||||
import {FrequencyStateArchiveStep} from "../../chain/archiveStore/strategies/frequencyStateArchiveStrategy.js";
|
||||
import {BlockInputSource} from "../../chain/blocks/blockInput/index.js";
|
||||
import {JobQueueItemType} from "../../chain/bls/index.js";
|
||||
import {AttestationErrorCode, BlockErrorCode} from "../../chain/errors/index.js";
|
||||
@@ -1420,6 +1422,21 @@ export function createLodestarMetrics(
|
||||
},
|
||||
},
|
||||
|
||||
processFinalizedCheckpoint: {
|
||||
durationByTask: register.histogram<{source: ArchiveStoreTask}>({
|
||||
name: "lodestar_process_finalized_checkpoint_seconds",
|
||||
help: "Histogram of time to process finalized checkpoint",
|
||||
buckets: [0.1, 0.5, 1, 2, 4, 8],
|
||||
labelNames: ["source"],
|
||||
}),
|
||||
frequencyStateArchive: register.histogram<{step: FrequencyStateArchiveStep}>({
|
||||
name: "lodestar_process_finalized_checkpoint_frequency_state_archive_seconds",
|
||||
help: "Histogram of FrequencyStateArchive duration by step",
|
||||
buckets: [0.1, 0.5, 1, 2, 4, 8],
|
||||
labelNames: ["step"],
|
||||
}),
|
||||
},
|
||||
|
||||
regenFnCallTotal: register.gauge<{entrypoint: RegenFnName; caller: RegenCaller}>({
|
||||
name: "lodestar_regen_fn_call_total",
|
||||
help: "Total number of calls for regen functions",
|
||||
|
||||
Reference in New Issue
Block a user