mirror of
https://github.com/ChainSafe/lodestar.git
synced 2026-01-09 15:48:08 -05:00
fix: add log and metrics to ColumnReconstructionTracker (#8433)
**Motivation** - we want to know more insight of ColumnReconstructionTracker **Description** - add logs to ColumnReconstructionTracker - fix metrics of ColumnReconstructionTracker: centralize to 1 place and make it better to render on Grafana - remove unused error Closes #8402 --------- Co-authored-by: Tuyen Nguyen <twoeths@users.noreply.github.com> Co-authored-by: Cayman <caymannava@gmail.com>
This commit is contained in:
@@ -3,7 +3,7 @@ import {ChainEventEmitter} from "./emitter.js";
|
||||
import {Metrics} from "../metrics/metrics.js";
|
||||
import {ChainForkConfig} from "@lodestar/config";
|
||||
import {BlockInputColumns} from "./blocks/blockInput/index.js";
|
||||
import {recoverDataColumnSidecars} from "../util/dataColumns.js";
|
||||
import {DataColumnReconstructionCode, recoverDataColumnSidecars} from "../util/dataColumns.js";
|
||||
|
||||
/**
|
||||
* Minimum time to wait before attempting reconstruction
|
||||
@@ -64,9 +64,23 @@ export class ColumnReconstructionTracker {
|
||||
const delay =
|
||||
RECONSTRUCTION_DELAY_MIN_MS + Math.random() * (RECONSTRUCTION_DELAY_MAX_MS - RECONSTRUCTION_DELAY_MIN_MS);
|
||||
sleep(delay).then(() => {
|
||||
recoverDataColumnSidecars(blockInput, this.emitter, this.metrics).finally(() => {
|
||||
this.running = false;
|
||||
});
|
||||
const logCtx = {slot: blockInput.slot, root: blockInput.blockRootHex};
|
||||
this.logger.debug("Attempting data column sidecar reconstruction", logCtx);
|
||||
recoverDataColumnSidecars(blockInput, this.emitter, this.metrics)
|
||||
.then((result) => {
|
||||
this.metrics?.recoverDataColumnSidecars.reconstructionResult.inc({result});
|
||||
this.logger.debug("Data column sidecar reconstruction complete", {...logCtx, result});
|
||||
})
|
||||
.catch((e) => {
|
||||
this.metrics?.recoverDataColumnSidecars.reconstructionResult.inc({
|
||||
result: DataColumnReconstructionCode.Failed,
|
||||
});
|
||||
this.logger.debug("Error during data column sidecar reconstruction", logCtx, e as Error);
|
||||
})
|
||||
.finally(() => {
|
||||
this.logger.debug("Data column sidecar reconstruction attempt finished", logCtx);
|
||||
this.running = false;
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -814,7 +814,7 @@ export function createLodestarMetrics(
|
||||
name: "lodestar_recover_data_column_sidecar_recovered_columns_total",
|
||||
help: "Total number of columns that were recovered",
|
||||
}),
|
||||
reconstructionResult: register.gauge<{result: DataColumnReconstructionCode}>({
|
||||
reconstructionResult: register.counter<{result: DataColumnReconstructionCode}>({
|
||||
name: "lodestar_data_column_sidecars_reconstruction_result",
|
||||
help: "Data column sidecars reconstruction result",
|
||||
labelNames: ["result"],
|
||||
|
||||
@@ -14,7 +14,7 @@ import {
|
||||
fulu,
|
||||
} from "@lodestar/types";
|
||||
import {ssz} from "@lodestar/types";
|
||||
import {bytesToBigInt, LodestarError} from "@lodestar/utils";
|
||||
import {bytesToBigInt} from "@lodestar/utils";
|
||||
import {NodeId} from "../network/subnets/index.js";
|
||||
import {kzg} from "./kzg.js";
|
||||
import {dataColumnMatrixRecovery} from "./blobs.js";
|
||||
@@ -344,23 +344,17 @@ export async function recoverDataColumnSidecars(
|
||||
blockInput: BlockInputColumns,
|
||||
emitter: ChainEventEmitter,
|
||||
metrics: Metrics | null
|
||||
): Promise<void> {
|
||||
): Promise<DataColumnReconstructionCode> {
|
||||
const existingColumns = blockInput.getAllColumns();
|
||||
const columnCount = existingColumns.length;
|
||||
if (columnCount >= NUMBER_OF_COLUMNS) {
|
||||
// We have all columns
|
||||
metrics?.recoverDataColumnSidecars.reconstructionResult.inc({
|
||||
result: DataColumnReconstructionCode.NotAttemptedAlreadyFull,
|
||||
});
|
||||
return;
|
||||
return DataColumnReconstructionCode.NotAttemptedAlreadyFull;
|
||||
}
|
||||
|
||||
if (columnCount < NUMBER_OF_COLUMNS / 2) {
|
||||
// We don't have enough columns to recover
|
||||
metrics?.recoverDataColumnSidecars.reconstructionResult.inc({
|
||||
result: DataColumnReconstructionCode.NotAttemptedHaveLessThanHalf,
|
||||
});
|
||||
return;
|
||||
return DataColumnReconstructionCode.NotAttemptedHaveLessThanHalf;
|
||||
}
|
||||
|
||||
metrics?.recoverDataColumnSidecars.custodyBeforeReconstruction.set(columnCount);
|
||||
@@ -378,18 +372,12 @@ export async function recoverDataColumnSidecars(
|
||||
const fullSidecars = await dataColumnMatrixRecovery(partialSidecars).catch(() => null);
|
||||
timer?.();
|
||||
if (fullSidecars == null) {
|
||||
metrics?.recoverDataColumnSidecars.reconstructionResult.inc({
|
||||
result: DataColumnReconstructionCode.ReconstructionFailed,
|
||||
});
|
||||
return;
|
||||
return DataColumnReconstructionCode.NullReturned;
|
||||
}
|
||||
|
||||
if (blockInput.getAllColumns().length === NUMBER_OF_COLUMNS) {
|
||||
// either gossip or getBlobsV2 resolved availability while we were recovering
|
||||
metrics?.recoverDataColumnSidecars.reconstructionResult.inc({
|
||||
result: DataColumnReconstructionCode.ReceivedAllDuringReconstruction,
|
||||
});
|
||||
return;
|
||||
return DataColumnReconstructionCode.SuccessLate;
|
||||
}
|
||||
|
||||
// Once the node obtains a column through reconstruction,
|
||||
@@ -414,22 +402,14 @@ export async function recoverDataColumnSidecars(
|
||||
}
|
||||
emitter.emit(ChainEvent.publishDataColumns, sidecarsToPublish);
|
||||
|
||||
metrics?.recoverDataColumnSidecars.reconstructionResult.inc({result: DataColumnReconstructionCode.Success});
|
||||
return DataColumnReconstructionCode.SuccessResolved;
|
||||
}
|
||||
|
||||
export enum DataColumnReconstructionCode {
|
||||
NotAttemptedAlreadyFull = "DATA_COLUMN_RECONSTRUCTION_NOT_ATTEMPTED_ALREADY_FULL",
|
||||
NotAttemptedHaveLessThanHalf = "DATA_COLUMN_RECONSTRUCTION_NOT_ATTEMPTED_HAVE_LESS_THAN_HALF",
|
||||
ReconstructionFailed = "DATA_COLUMN_RECONSTRUCTION_RECONSTRUCTION_FAILED",
|
||||
ReceivedAllDuringReconstruction = "DATA_COLUMN_RECONSTRUCTION_RECEIVED_ALL_DURING_RECONSTRUCTION",
|
||||
Success = "DATA_COLUMN_RECONSTRUCTION_SUCCESS",
|
||||
NotAttemptedAlreadyFull = "not_attempted_full",
|
||||
NotAttemptedHaveLessThanHalf = "not_attempted_less_than_half",
|
||||
NullReturned = "null_returned",
|
||||
SuccessLate = "success_late",
|
||||
SuccessResolved = "success_resolved",
|
||||
Failed = "failed",
|
||||
}
|
||||
|
||||
type DataColumnReconstructionErrorType = {
|
||||
code:
|
||||
| DataColumnReconstructionCode.NotAttemptedHaveLessThanHalf
|
||||
| DataColumnReconstructionCode.ReceivedAllDuringReconstruction
|
||||
| DataColumnReconstructionCode.ReconstructionFailed;
|
||||
};
|
||||
|
||||
export class DataColumnReconstructionError extends LodestarError<DataColumnReconstructionErrorType> {}
|
||||
|
||||
Reference in New Issue
Block a user