mirror of
https://github.com/ChainSafe/lodestar.git
synced 2026-01-10 08:08:16 -05:00
Add more range sync metrics (#3803)
* Add more sync metrics * Bump to 8.4.2 * Lock Grafana version * Add Sync - Range charts * Set exemplar false
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
FROM grafana/grafana:latest
|
||||
# Same version as our ansible deployments, to minimize the diff in the dashboard on export
|
||||
FROM grafana/grafana:8.4.2
|
||||
|
||||
COPY provisioning/ /etc/grafana/provisioning/
|
||||
COPY provisioning/dashboards/*.json /provisioning/dashboards/
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -369,15 +369,43 @@ export function createLodestarMetrics(
|
||||
|
||||
// Sync
|
||||
|
||||
syncChainsStarted: register.gauge<"syncType">({
|
||||
name: "lodestar_sync_chains_started_total",
|
||||
help: "Total number of sync chains started events, labeled by syncType",
|
||||
labelNames: ["syncType"],
|
||||
}),
|
||||
syncStatus: register.gauge({
|
||||
name: "lodestar_sync_status",
|
||||
help: "Range sync status: [Stalled, SyncingFinalized, SyncingHead, Synced]",
|
||||
}),
|
||||
syncPeersBySyncType: register.gauge<"syncType">({
|
||||
name: "lodestar_sync_range_sync_peers",
|
||||
help: "Count of peers by sync type [FullySynced, Advanced, Behind]",
|
||||
labelNames: ["syncType"],
|
||||
}),
|
||||
syncSwitchGossipSubscriptions: register.gauge<"action">({
|
||||
name: "lodestar_sync_switch_gossip_subscriptions",
|
||||
help: "Sync switched gossip subscriptions on/off",
|
||||
labelNames: ["action"],
|
||||
}),
|
||||
|
||||
syncRange: {
|
||||
syncChainsEvents: register.gauge<"syncType" | "event">({
|
||||
name: "lodestar_sync_chains_events_total",
|
||||
help: "Total number of sync chains events events, labeled by syncType",
|
||||
labelNames: ["syncType", "event"],
|
||||
}),
|
||||
syncChains: register.gauge<"syncType">({
|
||||
name: "lodestar_sync_chains_count",
|
||||
help: "Count of sync chains by syncType",
|
||||
labelNames: ["syncType"],
|
||||
}),
|
||||
syncChainsPeers: register.avgMinMax<"syncType">({
|
||||
name: "lodestar_sync_chains_peer_count_by_type",
|
||||
help: "Count of sync chain peers by syncType",
|
||||
labelNames: ["syncType"],
|
||||
}),
|
||||
syncChainHighestTargetSlotCompleted: register.gauge({
|
||||
name: "lodestar_sync_chain_highest_target_slot_completed",
|
||||
help: "Highest target slot completed by a sync chain",
|
||||
}),
|
||||
},
|
||||
|
||||
syncUnknownBlock: {
|
||||
requests: register.gauge({
|
||||
name: "lodestar_sync_unknown_block_requests_total",
|
||||
|
||||
@@ -2,11 +2,13 @@ import {GaugeConfiguration} from "prom-client";
|
||||
import {GaugeExtra} from "./gauge";
|
||||
|
||||
type GetValuesFn = () => number[];
|
||||
type Labels<T extends string> = Partial<Record<T, string | number>>;
|
||||
|
||||
/**
|
||||
* Special non-standard "Histogram" that captures the avg, min and max of values
|
||||
*/
|
||||
export class AvgMinMax<T extends string> {
|
||||
private readonly sum: GaugeExtra<string>;
|
||||
private readonly avg: GaugeExtra<string>;
|
||||
private readonly min: GaugeExtra<string>;
|
||||
private readonly max: GaugeExtra<string>;
|
||||
@@ -14,6 +16,7 @@ export class AvgMinMax<T extends string> {
|
||||
private getValuesFn: GetValuesFn | null = null;
|
||||
|
||||
constructor(configuration: GaugeConfiguration<T>) {
|
||||
this.sum = new GaugeExtra({...configuration, name: `${configuration.name}_sum`});
|
||||
this.avg = new GaugeExtra({...configuration, name: `${configuration.name}_avg`});
|
||||
this.min = new GaugeExtra({...configuration, name: `${configuration.name}_min`});
|
||||
this.max = new GaugeExtra({...configuration, name: `${configuration.name}_max`});
|
||||
@@ -29,11 +32,25 @@ export class AvgMinMax<T extends string> {
|
||||
}
|
||||
}
|
||||
|
||||
set(values: number[]): void {
|
||||
const {avg, min, max} = getStats(values);
|
||||
this.avg.set(avg);
|
||||
this.min.set(min);
|
||||
this.max.set(max);
|
||||
set(values: number[]): void;
|
||||
set(labels: Labels<T>, values: number[]): void;
|
||||
set(arg1?: Labels<T> | number[], arg2?: number[]): void {
|
||||
if (arg2 === undefined) {
|
||||
const values = arg1 as number[];
|
||||
const {sum, avg, min, max} = getStats(values);
|
||||
this.sum.set(sum);
|
||||
this.avg.set(avg);
|
||||
this.min.set(min);
|
||||
this.max.set(max);
|
||||
} else {
|
||||
const values = (arg2 !== undefined ? arg2 : arg1) as number[];
|
||||
const labels = arg1 as Labels<T>;
|
||||
const {sum, avg, min, max} = getStats(values);
|
||||
this.sum.set(labels, sum);
|
||||
this.avg.set(labels, avg);
|
||||
this.min.set(labels, min);
|
||||
this.max.set(labels, max);
|
||||
}
|
||||
}
|
||||
|
||||
private onCollect = (): void => {
|
||||
@@ -44,6 +61,7 @@ export class AvgMinMax<T extends string> {
|
||||
}
|
||||
|
||||
type ArrStatistics = {
|
||||
sum: number;
|
||||
avg: number;
|
||||
min: number;
|
||||
max: number;
|
||||
@@ -51,19 +69,19 @@ type ArrStatistics = {
|
||||
|
||||
function getStats(values: number[]): ArrStatistics {
|
||||
if (values.length < 1) {
|
||||
return {avg: 0, min: 0, max: 0};
|
||||
return {sum: 0, avg: 0, min: 0, max: 0};
|
||||
}
|
||||
|
||||
let min = values[0];
|
||||
let max = values[0];
|
||||
let total = values[0];
|
||||
let sum = values[0];
|
||||
|
||||
for (let i = 1; i < values.length; i++) {
|
||||
const val = values[i];
|
||||
if (val < min) min = val;
|
||||
if (val > max) max = val;
|
||||
total += val;
|
||||
sum += val;
|
||||
}
|
||||
|
||||
return {avg: total / values.length, min, max};
|
||||
return {sum, avg: sum / values.length, min, max};
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ export type SyncChainFns = {
|
||||
/** Report peer for negative actions. Decouples from the full network instance */
|
||||
reportPeer: (peer: PeerId, action: PeerAction, actionName: string) => void;
|
||||
/** Hook called when Chain state completes */
|
||||
onEnd: (err?: Error) => void;
|
||||
onEnd: (err: Error | null, target: ChainTarget | null) => void;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -86,7 +86,10 @@ export class SyncChain {
|
||||
/** Short string id to identify this SyncChain in logs */
|
||||
readonly logId: string;
|
||||
readonly syncType: RangeSyncType;
|
||||
/** Should sync up until this slot, then stop */
|
||||
/**
|
||||
* Should sync up until this slot, then stop.
|
||||
* Finalized SyncChains have a dynamic target, so if this chain has no peers the target can become null
|
||||
*/
|
||||
target: ChainTarget | null = null;
|
||||
|
||||
/** Number of validated epochs. For the SyncRange to prevent switching chains too fast */
|
||||
@@ -128,8 +131,8 @@ export class SyncChain {
|
||||
|
||||
// Trigger event on parent class
|
||||
this.sync().then(
|
||||
() => fns.onEnd(),
|
||||
(e) => fns.onEnd(e)
|
||||
() => fns.onEnd(null, this.target),
|
||||
(e) => fns.onEnd(e, null)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import {ILogger} from "@chainsafe/lodestar-utils";
|
||||
import {IBeaconChain} from "../../chain";
|
||||
import {INetwork} from "../../network";
|
||||
import {IMetrics} from "../../metrics";
|
||||
import {RangeSyncType, getRangeSyncType} from "../utils/remoteSyncType";
|
||||
import {RangeSyncType, getRangeSyncType, rangeSyncTypes} from "../utils/remoteSyncType";
|
||||
import {updateChains, shouldRemoveChain} from "./utils";
|
||||
import {ChainTarget, SyncChainFns, SyncChain, SyncChainOpts, SyncChainDebugState} from "./chain";
|
||||
import {PartiallyVerifiedBlockFlags} from "../../chain/blocks";
|
||||
@@ -86,12 +86,17 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
|
||||
|
||||
constructor(modules: RangeSyncModules, opts?: RangeSyncOpts) {
|
||||
super();
|
||||
this.chain = modules.chain;
|
||||
this.network = modules.network;
|
||||
this.metrics = modules.metrics;
|
||||
this.config = modules.config;
|
||||
this.logger = modules.logger;
|
||||
const {chain, network, metrics, config, logger} = modules;
|
||||
this.chain = chain;
|
||||
this.network = network;
|
||||
this.metrics = metrics;
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
this.opts = opts;
|
||||
|
||||
if (metrics) {
|
||||
metrics.syncStatus.addCollect(() => this.scrapeMetrics(metrics));
|
||||
}
|
||||
}
|
||||
|
||||
/** Throw / return all AsyncGenerators inside every SyncChain instance */
|
||||
@@ -215,10 +220,13 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
|
||||
};
|
||||
|
||||
/** Convenience method for `SyncChain` */
|
||||
private onSyncChainEnd: SyncChainFns["onEnd"] = () => {
|
||||
const localStatus = this.chain.getStatus();
|
||||
this.update(localStatus.finalizedEpoch);
|
||||
private onSyncChainEnd: SyncChainFns["onEnd"] = (err, target) => {
|
||||
this.update(this.chain.forkChoice.getFinalizedCheckpoint().epoch);
|
||||
this.emit(RangeSyncEvent.completedChain);
|
||||
|
||||
if (err === null && target !== null) {
|
||||
this.metrics?.syncRange.syncChainHighestTargetSlotCompleted.set(target.slot);
|
||||
}
|
||||
};
|
||||
|
||||
private addPeerOrCreateChain(startEpoch: Epoch, target: ChainTarget, peer: PeerId, syncType: RangeSyncType): void {
|
||||
@@ -237,7 +245,8 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
|
||||
this.opts
|
||||
);
|
||||
this.chains.set(syncType, syncChain);
|
||||
this.logger.verbose("New syncChain", {syncType});
|
||||
this.logger.verbose("Added syncChain", {syncType});
|
||||
this.metrics?.syncRange.syncChainsEvents.inc({syncType: syncChain.syncType, event: "add"});
|
||||
}
|
||||
|
||||
syncChain.addPeer(peer, target);
|
||||
@@ -252,6 +261,7 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
|
||||
syncChain.remove();
|
||||
this.chains.delete(id);
|
||||
this.logger.debug("Removed syncChain", {id: syncChain.logId});
|
||||
this.metrics?.syncRange.syncChainsEvents.inc({syncType: syncChain.syncType, event: "remove"});
|
||||
|
||||
// Re-status peers from successful chain. Potentially trigger a Head sync
|
||||
this.network.reStatusPeers(syncChain.getPeers());
|
||||
@@ -262,11 +272,38 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
|
||||
|
||||
for (const syncChain of toStop) {
|
||||
syncChain.stopSyncing();
|
||||
if (syncChain.isSyncing) {
|
||||
this.metrics?.syncRange.syncChainsEvents.inc({syncType: syncChain.syncType, event: "stop"});
|
||||
}
|
||||
}
|
||||
|
||||
for (const syncChain of toStart) {
|
||||
syncChain.startSyncing(localFinalizedEpoch);
|
||||
if (!syncChain.isSyncing) this.metrics?.syncChainsStarted.inc({syncType: syncChain.syncType});
|
||||
if (!syncChain.isSyncing) {
|
||||
this.metrics?.syncRange.syncChainsEvents.inc({syncType: syncChain.syncType, event: "start"});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private scrapeMetrics(metrics: IMetrics): void {
|
||||
const syncChainsByType: Record<RangeSyncType, number> = {
|
||||
[RangeSyncType.Finalized]: 0,
|
||||
[RangeSyncType.Head]: 0,
|
||||
};
|
||||
|
||||
const peersByTypeArr: Record<RangeSyncType, number[]> = {
|
||||
[RangeSyncType.Finalized]: [],
|
||||
[RangeSyncType.Head]: [],
|
||||
};
|
||||
|
||||
for (const chain of this.chains.values()) {
|
||||
peersByTypeArr[chain.syncType].push(chain.peers);
|
||||
syncChainsByType[chain.syncType]++;
|
||||
}
|
||||
|
||||
for (const syncType of rangeSyncTypes) {
|
||||
metrics.syncRange.syncChains.set({syncType}, syncChainsByType[syncType]);
|
||||
metrics.syncRange.syncChainsPeers.set({syncType}, peersByTypeArr[syncType]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,9 +4,10 @@ import {INetwork, NetworkEvent} from "../network";
|
||||
import {ILogger} from "@chainsafe/lodestar-utils";
|
||||
import {SLOTS_PER_EPOCH} from "@chainsafe/lodestar-params";
|
||||
import {Slot, phase0} from "@chainsafe/lodestar-types";
|
||||
import {IMetrics} from "../metrics";
|
||||
import {ChainEvent, IBeaconChain} from "../chain";
|
||||
import {RangeSync, RangeSyncStatus, RangeSyncEvent} from "./range/range";
|
||||
import {getPeerSyncType, PeerSyncType} from "./utils/remoteSyncType";
|
||||
import {getPeerSyncType, PeerSyncType, peerSyncTypes} from "./utils/remoteSyncType";
|
||||
import {MIN_EPOCH_TO_START_GOSSIP} from "./constants";
|
||||
import {SyncState, SyncChainDebugState, syncStateMetric} from "./interface";
|
||||
import {SyncOptions} from "./options";
|
||||
@@ -16,11 +17,15 @@ export class BeaconSync implements IBeaconSync {
|
||||
private readonly logger: ILogger;
|
||||
private readonly network: INetwork;
|
||||
private readonly chain: IBeaconChain;
|
||||
private readonly metrics: IMetrics | null;
|
||||
private readonly opts: SyncOptions;
|
||||
|
||||
private readonly rangeSync: RangeSync;
|
||||
private readonly unknownBlockSync: UnknownBlockSync;
|
||||
|
||||
/** For metrics only */
|
||||
private readonly peerSyncType = new Map<string, PeerSyncType>();
|
||||
|
||||
/**
|
||||
* The number of slots ahead of us that is allowed before starting a RangeSync
|
||||
* If a peer is within this tolerance (forwards or backwards), it is treated as a fully sync'd peer.
|
||||
@@ -36,6 +41,7 @@ export class BeaconSync implements IBeaconSync {
|
||||
this.opts = opts;
|
||||
this.network = network;
|
||||
this.chain = chain;
|
||||
this.metrics = metrics;
|
||||
this.logger = logger;
|
||||
this.rangeSync = new RangeSync(modules, opts);
|
||||
this.unknownBlockSync = new UnknownBlockSync(config, network, chain, logger, metrics, opts);
|
||||
@@ -51,7 +57,7 @@ export class BeaconSync implements IBeaconSync {
|
||||
this.chain.emitter.on(ChainEvent.clockEpoch, this.onClockEpoch);
|
||||
|
||||
if (metrics) {
|
||||
metrics.syncStatus.addCollect(() => metrics.syncStatus.set(syncStateMetric[this.state]));
|
||||
metrics.syncStatus.addCollect(() => this.scrapeMetrics(metrics));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,6 +150,9 @@ export class BeaconSync implements IBeaconSync {
|
||||
const localStatus = this.chain.getStatus();
|
||||
const syncType = getPeerSyncType(localStatus, peerStatus, this.chain.forkChoice, this.slotImportTolerance);
|
||||
|
||||
// For metrics only
|
||||
this.peerSyncType.set(peerId.toB58String(), syncType);
|
||||
|
||||
if (syncType === PeerSyncType.Advanced) {
|
||||
this.rangeSync.addPeer(peerId, localStatus, peerStatus);
|
||||
}
|
||||
@@ -156,6 +165,8 @@ export class BeaconSync implements IBeaconSync {
|
||||
*/
|
||||
private removePeer = (peerId: PeerId): void => {
|
||||
this.rangeSync.removePeer(peerId);
|
||||
|
||||
this.peerSyncType.delete(peerId.toB58String());
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -171,6 +182,7 @@ export class BeaconSync implements IBeaconSync {
|
||||
this.chain.clock.currentSlot >= MIN_EPOCH_TO_START_GOSSIP
|
||||
) {
|
||||
this.network.subscribeGossipCoreTopics();
|
||||
this.metrics?.syncSwitchGossipSubscriptions.inc({action: "subscribed"});
|
||||
this.logger.info("Subscribed gossip core topics");
|
||||
}
|
||||
|
||||
@@ -180,6 +192,7 @@ export class BeaconSync implements IBeaconSync {
|
||||
if (syncDiff > this.slotImportTolerance * 2) {
|
||||
this.logger.warn(`Node sync has fallen behind by ${syncDiff} slots`);
|
||||
this.network.unsubscribeGossipCoreTopics();
|
||||
this.metrics?.syncSwitchGossipSubscriptions.inc({action: "unsubscribed"});
|
||||
this.logger.info("Un-subscribed gossip core topics");
|
||||
}
|
||||
}
|
||||
@@ -192,4 +205,23 @@ export class BeaconSync implements IBeaconSync {
|
||||
// by realizing it's way behind and turning gossip off.
|
||||
this.updateSyncState();
|
||||
};
|
||||
|
||||
private scrapeMetrics(metrics: IMetrics): void {
|
||||
// Compute current sync state
|
||||
metrics.syncStatus.set(syncStateMetric[this.state]);
|
||||
|
||||
// Count peers by syncType
|
||||
const peerCountByType: Record<PeerSyncType, number> = {
|
||||
[PeerSyncType.Advanced]: 0,
|
||||
[PeerSyncType.FullySynced]: 0,
|
||||
[PeerSyncType.Behind]: 0,
|
||||
};
|
||||
for (const syncType of this.peerSyncType.values()) {
|
||||
peerCountByType[syncType]++;
|
||||
}
|
||||
|
||||
for (const syncType of peerSyncTypes) {
|
||||
metrics.syncPeersBySyncType.set({syncType}, peerCountByType[syncType]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,9 @@ export enum PeerSyncType {
|
||||
Behind = "Behind",
|
||||
}
|
||||
|
||||
// Cache Object.keys iteration for faster loops in metrics
|
||||
export const peerSyncTypes = Object.keys(PeerSyncType) as PeerSyncType[];
|
||||
|
||||
export function getPeerSyncType(
|
||||
local: phase0.Status,
|
||||
remote: phase0.Status,
|
||||
@@ -73,6 +76,9 @@ export enum RangeSyncType {
|
||||
Head = "Head",
|
||||
}
|
||||
|
||||
// Cache Object.keys iteration for faster loops in metrics
|
||||
export const rangeSyncTypes = Object.keys(RangeSyncType) as RangeSyncType[];
|
||||
|
||||
/**
|
||||
* Check if a peer requires a finalized chain sync. Only if:
|
||||
* - The remotes finalized epoch is greater than our current finalized epoch and we have
|
||||
|
||||
Reference in New Issue
Block a user