Add more range sync metrics (#3803)

* Add more sync metrics

* Bump to 8.4.2

* Lock Grafana version

* Add Sync - Range charts

* Set exemplar false
This commit is contained in:
Lion - dapplion
2022-02-28 22:39:26 +05:30
committed by GitHub
parent e175d12665
commit 0efbf1d671
8 changed files with 874 additions and 292 deletions

View File

@@ -1,4 +1,5 @@
FROM grafana/grafana:latest
# Same version as our ansible deployments, to minimize the diff in the dashboard on export
FROM grafana/grafana:8.4.2
COPY provisioning/ /etc/grafana/provisioning/
COPY provisioning/dashboards/*.json /provisioning/dashboards/

File diff suppressed because it is too large Load Diff

View File

@@ -369,15 +369,43 @@ export function createLodestarMetrics(
// Sync
syncChainsStarted: register.gauge<"syncType">({
name: "lodestar_sync_chains_started_total",
help: "Total number of sync chains started events, labeled by syncType",
labelNames: ["syncType"],
}),
syncStatus: register.gauge({
name: "lodestar_sync_status",
help: "Range sync status: [Stalled, SyncingFinalized, SyncingHead, Synced]",
}),
syncPeersBySyncType: register.gauge<"syncType">({
name: "lodestar_sync_range_sync_peers",
help: "Count of peers by sync type [FullySynced, Advanced, Behind]",
labelNames: ["syncType"],
}),
syncSwitchGossipSubscriptions: register.gauge<"action">({
name: "lodestar_sync_switch_gossip_subscriptions",
help: "Sync switched gossip subscriptions on/off",
labelNames: ["action"],
}),
syncRange: {
syncChainsEvents: register.gauge<"syncType" | "event">({
name: "lodestar_sync_chains_events_total",
help: "Total number of sync chains events events, labeled by syncType",
labelNames: ["syncType", "event"],
}),
syncChains: register.gauge<"syncType">({
name: "lodestar_sync_chains_count",
help: "Count of sync chains by syncType",
labelNames: ["syncType"],
}),
syncChainsPeers: register.avgMinMax<"syncType">({
name: "lodestar_sync_chains_peer_count_by_type",
help: "Count of sync chain peers by syncType",
labelNames: ["syncType"],
}),
syncChainHighestTargetSlotCompleted: register.gauge({
name: "lodestar_sync_chain_highest_target_slot_completed",
help: "Highest target slot completed by a sync chain",
}),
},
syncUnknownBlock: {
requests: register.gauge({
name: "lodestar_sync_unknown_block_requests_total",

View File

@@ -2,11 +2,13 @@ import {GaugeConfiguration} from "prom-client";
import {GaugeExtra} from "./gauge";
type GetValuesFn = () => number[];
type Labels<T extends string> = Partial<Record<T, string | number>>;
/**
* Special non-standard "Histogram" that captures the avg, min and max of values
*/
export class AvgMinMax<T extends string> {
private readonly sum: GaugeExtra<string>;
private readonly avg: GaugeExtra<string>;
private readonly min: GaugeExtra<string>;
private readonly max: GaugeExtra<string>;
@@ -14,6 +16,7 @@ export class AvgMinMax<T extends string> {
private getValuesFn: GetValuesFn | null = null;
constructor(configuration: GaugeConfiguration<T>) {
this.sum = new GaugeExtra({...configuration, name: `${configuration.name}_sum`});
this.avg = new GaugeExtra({...configuration, name: `${configuration.name}_avg`});
this.min = new GaugeExtra({...configuration, name: `${configuration.name}_min`});
this.max = new GaugeExtra({...configuration, name: `${configuration.name}_max`});
@@ -29,11 +32,25 @@ export class AvgMinMax<T extends string> {
}
}
set(values: number[]): void {
const {avg, min, max} = getStats(values);
this.avg.set(avg);
this.min.set(min);
this.max.set(max);
set(values: number[]): void;
set(labels: Labels<T>, values: number[]): void;
set(arg1?: Labels<T> | number[], arg2?: number[]): void {
if (arg2 === undefined) {
const values = arg1 as number[];
const {sum, avg, min, max} = getStats(values);
this.sum.set(sum);
this.avg.set(avg);
this.min.set(min);
this.max.set(max);
} else {
const values = (arg2 !== undefined ? arg2 : arg1) as number[];
const labels = arg1 as Labels<T>;
const {sum, avg, min, max} = getStats(values);
this.sum.set(labels, sum);
this.avg.set(labels, avg);
this.min.set(labels, min);
this.max.set(labels, max);
}
}
private onCollect = (): void => {
@@ -44,6 +61,7 @@ export class AvgMinMax<T extends string> {
}
type ArrStatistics = {
sum: number;
avg: number;
min: number;
max: number;
@@ -51,19 +69,19 @@ type ArrStatistics = {
function getStats(values: number[]): ArrStatistics {
if (values.length < 1) {
return {avg: 0, min: 0, max: 0};
return {sum: 0, avg: 0, min: 0, max: 0};
}
let min = values[0];
let max = values[0];
let total = values[0];
let sum = values[0];
for (let i = 1; i < values.length; i++) {
const val = values[i];
if (val < min) min = val;
if (val > max) max = val;
total += val;
sum += val;
}
return {avg: total / values.length, min, max};
return {sum, avg: sum / values.length, min, max};
}

View File

@@ -44,7 +44,7 @@ export type SyncChainFns = {
/** Report peer for negative actions. Decouples from the full network instance */
reportPeer: (peer: PeerId, action: PeerAction, actionName: string) => void;
/** Hook called when Chain state completes */
onEnd: (err?: Error) => void;
onEnd: (err: Error | null, target: ChainTarget | null) => void;
};
/**
@@ -86,7 +86,10 @@ export class SyncChain {
/** Short string id to identify this SyncChain in logs */
readonly logId: string;
readonly syncType: RangeSyncType;
/** Should sync up until this slot, then stop */
/**
* Should sync up until this slot, then stop.
* Finalized SyncChains have a dynamic target, so if this chain has no peers the target can become null
*/
target: ChainTarget | null = null;
/** Number of validated epochs. For the SyncRange to prevent switching chains too fast */
@@ -128,8 +131,8 @@ export class SyncChain {
// Trigger event on parent class
this.sync().then(
() => fns.onEnd(),
(e) => fns.onEnd(e)
() => fns.onEnd(null, this.target),
(e) => fns.onEnd(e, null)
);
}

View File

@@ -8,7 +8,7 @@ import {ILogger} from "@chainsafe/lodestar-utils";
import {IBeaconChain} from "../../chain";
import {INetwork} from "../../network";
import {IMetrics} from "../../metrics";
import {RangeSyncType, getRangeSyncType} from "../utils/remoteSyncType";
import {RangeSyncType, getRangeSyncType, rangeSyncTypes} from "../utils/remoteSyncType";
import {updateChains, shouldRemoveChain} from "./utils";
import {ChainTarget, SyncChainFns, SyncChain, SyncChainOpts, SyncChainDebugState} from "./chain";
import {PartiallyVerifiedBlockFlags} from "../../chain/blocks";
@@ -86,12 +86,17 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
constructor(modules: RangeSyncModules, opts?: RangeSyncOpts) {
super();
this.chain = modules.chain;
this.network = modules.network;
this.metrics = modules.metrics;
this.config = modules.config;
this.logger = modules.logger;
const {chain, network, metrics, config, logger} = modules;
this.chain = chain;
this.network = network;
this.metrics = metrics;
this.config = config;
this.logger = logger;
this.opts = opts;
if (metrics) {
metrics.syncStatus.addCollect(() => this.scrapeMetrics(metrics));
}
}
/** Throw / return all AsyncGenerators inside every SyncChain instance */
@@ -215,10 +220,13 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
};
/** Convenience method for `SyncChain` */
private onSyncChainEnd: SyncChainFns["onEnd"] = () => {
const localStatus = this.chain.getStatus();
this.update(localStatus.finalizedEpoch);
private onSyncChainEnd: SyncChainFns["onEnd"] = (err, target) => {
this.update(this.chain.forkChoice.getFinalizedCheckpoint().epoch);
this.emit(RangeSyncEvent.completedChain);
if (err === null && target !== null) {
this.metrics?.syncRange.syncChainHighestTargetSlotCompleted.set(target.slot);
}
};
private addPeerOrCreateChain(startEpoch: Epoch, target: ChainTarget, peer: PeerId, syncType: RangeSyncType): void {
@@ -237,7 +245,8 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
this.opts
);
this.chains.set(syncType, syncChain);
this.logger.verbose("New syncChain", {syncType});
this.logger.verbose("Added syncChain", {syncType});
this.metrics?.syncRange.syncChainsEvents.inc({syncType: syncChain.syncType, event: "add"});
}
syncChain.addPeer(peer, target);
@@ -252,6 +261,7 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
syncChain.remove();
this.chains.delete(id);
this.logger.debug("Removed syncChain", {id: syncChain.logId});
this.metrics?.syncRange.syncChainsEvents.inc({syncType: syncChain.syncType, event: "remove"});
// Re-status peers from successful chain. Potentially trigger a Head sync
this.network.reStatusPeers(syncChain.getPeers());
@@ -262,11 +272,38 @@ export class RangeSync extends (EventEmitter as {new (): RangeSyncEmitter}) {
for (const syncChain of toStop) {
syncChain.stopSyncing();
if (syncChain.isSyncing) {
this.metrics?.syncRange.syncChainsEvents.inc({syncType: syncChain.syncType, event: "stop"});
}
}
for (const syncChain of toStart) {
syncChain.startSyncing(localFinalizedEpoch);
if (!syncChain.isSyncing) this.metrics?.syncChainsStarted.inc({syncType: syncChain.syncType});
if (!syncChain.isSyncing) {
this.metrics?.syncRange.syncChainsEvents.inc({syncType: syncChain.syncType, event: "start"});
}
}
}
private scrapeMetrics(metrics: IMetrics): void {
const syncChainsByType: Record<RangeSyncType, number> = {
[RangeSyncType.Finalized]: 0,
[RangeSyncType.Head]: 0,
};
const peersByTypeArr: Record<RangeSyncType, number[]> = {
[RangeSyncType.Finalized]: [],
[RangeSyncType.Head]: [],
};
for (const chain of this.chains.values()) {
peersByTypeArr[chain.syncType].push(chain.peers);
syncChainsByType[chain.syncType]++;
}
for (const syncType of rangeSyncTypes) {
metrics.syncRange.syncChains.set({syncType}, syncChainsByType[syncType]);
metrics.syncRange.syncChainsPeers.set({syncType}, peersByTypeArr[syncType]);
}
}
}

View File

@@ -4,9 +4,10 @@ import {INetwork, NetworkEvent} from "../network";
import {ILogger} from "@chainsafe/lodestar-utils";
import {SLOTS_PER_EPOCH} from "@chainsafe/lodestar-params";
import {Slot, phase0} from "@chainsafe/lodestar-types";
import {IMetrics} from "../metrics";
import {ChainEvent, IBeaconChain} from "../chain";
import {RangeSync, RangeSyncStatus, RangeSyncEvent} from "./range/range";
import {getPeerSyncType, PeerSyncType} from "./utils/remoteSyncType";
import {getPeerSyncType, PeerSyncType, peerSyncTypes} from "./utils/remoteSyncType";
import {MIN_EPOCH_TO_START_GOSSIP} from "./constants";
import {SyncState, SyncChainDebugState, syncStateMetric} from "./interface";
import {SyncOptions} from "./options";
@@ -16,11 +17,15 @@ export class BeaconSync implements IBeaconSync {
private readonly logger: ILogger;
private readonly network: INetwork;
private readonly chain: IBeaconChain;
private readonly metrics: IMetrics | null;
private readonly opts: SyncOptions;
private readonly rangeSync: RangeSync;
private readonly unknownBlockSync: UnknownBlockSync;
/** For metrics only */
private readonly peerSyncType = new Map<string, PeerSyncType>();
/**
* The number of slots ahead of us that is allowed before starting a RangeSync
* If a peer is within this tolerance (forwards or backwards), it is treated as a fully sync'd peer.
@@ -36,6 +41,7 @@ export class BeaconSync implements IBeaconSync {
this.opts = opts;
this.network = network;
this.chain = chain;
this.metrics = metrics;
this.logger = logger;
this.rangeSync = new RangeSync(modules, opts);
this.unknownBlockSync = new UnknownBlockSync(config, network, chain, logger, metrics, opts);
@@ -51,7 +57,7 @@ export class BeaconSync implements IBeaconSync {
this.chain.emitter.on(ChainEvent.clockEpoch, this.onClockEpoch);
if (metrics) {
metrics.syncStatus.addCollect(() => metrics.syncStatus.set(syncStateMetric[this.state]));
metrics.syncStatus.addCollect(() => this.scrapeMetrics(metrics));
}
}
@@ -144,6 +150,9 @@ export class BeaconSync implements IBeaconSync {
const localStatus = this.chain.getStatus();
const syncType = getPeerSyncType(localStatus, peerStatus, this.chain.forkChoice, this.slotImportTolerance);
// For metrics only
this.peerSyncType.set(peerId.toB58String(), syncType);
if (syncType === PeerSyncType.Advanced) {
this.rangeSync.addPeer(peerId, localStatus, peerStatus);
}
@@ -156,6 +165,8 @@ export class BeaconSync implements IBeaconSync {
*/
private removePeer = (peerId: PeerId): void => {
this.rangeSync.removePeer(peerId);
this.peerSyncType.delete(peerId.toB58String());
};
/**
@@ -171,6 +182,7 @@ export class BeaconSync implements IBeaconSync {
this.chain.clock.currentSlot >= MIN_EPOCH_TO_START_GOSSIP
) {
this.network.subscribeGossipCoreTopics();
this.metrics?.syncSwitchGossipSubscriptions.inc({action: "subscribed"});
this.logger.info("Subscribed gossip core topics");
}
@@ -180,6 +192,7 @@ export class BeaconSync implements IBeaconSync {
if (syncDiff > this.slotImportTolerance * 2) {
this.logger.warn(`Node sync has fallen behind by ${syncDiff} slots`);
this.network.unsubscribeGossipCoreTopics();
this.metrics?.syncSwitchGossipSubscriptions.inc({action: "unsubscribed"});
this.logger.info("Un-subscribed gossip core topics");
}
}
@@ -192,4 +205,23 @@ export class BeaconSync implements IBeaconSync {
// by realizing it's way behind and turning gossip off.
this.updateSyncState();
};
private scrapeMetrics(metrics: IMetrics): void {
// Compute current sync state
metrics.syncStatus.set(syncStateMetric[this.state]);
// Count peers by syncType
const peerCountByType: Record<PeerSyncType, number> = {
[PeerSyncType.Advanced]: 0,
[PeerSyncType.FullySynced]: 0,
[PeerSyncType.Behind]: 0,
};
for (const syncType of this.peerSyncType.values()) {
peerCountByType[syncType]++;
}
for (const syncType of peerSyncTypes) {
metrics.syncPeersBySyncType.set({syncType}, peerCountByType[syncType]);
}
}
}

View File

@@ -11,6 +11,9 @@ export enum PeerSyncType {
Behind = "Behind",
}
// Cache Object.keys iteration for faster loops in metrics
export const peerSyncTypes = Object.keys(PeerSyncType) as PeerSyncType[];
export function getPeerSyncType(
local: phase0.Status,
remote: phase0.Status,
@@ -73,6 +76,9 @@ export enum RangeSyncType {
Head = "Head",
}
// Cache Object.keys iteration for faster loops in metrics
export const rangeSyncTypes = Object.keys(RangeSyncType) as RangeSyncType[];
/**
* Check if a peer requires a finalized chain sync. Only if:
* - The remotes finalized epoch is greater than our current finalized epoch and we have